Patchwork [3/3] Loop flattening on loop-SSA.

login
register
mail settings
Submitter Sebastian Pop
Date Nov. 16, 2010, 9:45 p.m.
Message ID <AANLkTikDXOfD9OodZ2Hf2n5DQ=emxT9wG0Z=rv6eUbok@mail.gmail.com>
Download mbox | patch
Permalink /patch/71459/
State New
Headers show

Comments

Sebastian Pop - Nov. 16, 2010, 9:45 p.m.
Hi Richi,

Combined patch is 0001-Loop-flattening-on-loop-SSA.patch
the other patches are the separate fixes as asked in this review.

I am currently testing this on amd64-linux.

Sebastian

On Fri, Nov 5, 2010 at 07:51, Richard Guenther <rguenther@suse.de> wrote:
> On Wed, 3 Nov 2010, Sebastian Pop wrote:
>
>> 2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
>>
>>       * Makefile.in (OBJS-common): Add tree-loop-flattening.o.
>>       (tree-loop-flattening.o): New.
>>       * common.opt (ftree-loop-flatten): New.
>>       * dbgcnt.def (lflat): New.
>>       * params.def (PARAM_LFLAT_MAX_NB_BBS): New.
>>       * passes.c (init_optimization_passes): Add new passes
>>       pass_flatten_loops and pass_if_conversion after loop vectorization
>>       and before pass_slp_vectorize.
>>       * timevar.def (TV_TREE_LOOP_FLATTENING): New.
>>       * tree-loop-flattening.c: New.
>>       * tree-pass.h (pass_flatten_loops): Declared.
>>       * tree-flow.h (gate_tree_if_conversion): Declared.
>>       (tree_if_conversion): Declared.
>>       * tree-if-conv.c (tree_if_conversion): Not static anymore.
>>       (gate_tree_if_conversion): Same.
>
> Comments inline.
>
> What extra testing apart from the 4 testcases did this new pass get?
> Do we pass bootstrap with it enabled?  Did you check if we regress
> in SPEC 2k6 when it is enabled?
>
>>       * gcc.dg/tree-ssa/flat-loop-1.c: New.
>>       * gcc.dg/tree-ssa/flat-loop-2.c: New.
>>       * gcc.dg/tree-ssa/flat-loop-3.c: New.
>>       * gcc.dg/tree-ssa/flat-loop-4.c: New.
>> ---
>>  gcc/ChangeLog                               |   18 +
>>  gcc/Makefile.in                             |    4 +
>>  gcc/common.opt                              |    4 +
>>  gcc/dbgcnt.def                              |    1 +
>>  gcc/params.def                              |    7 +
>>  gcc/passes.c                                |    1 +
>>  gcc/testsuite/ChangeLog                     |    7 +
>>  gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c |   28 ++
>>  gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c |   39 ++
>>  gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c |   19 +
>>  gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c |   23 +
>>  gcc/timevar.def                             |    1 +
>>  gcc/tree-flow.h                             |    4 +
>>  gcc/tree-if-conv.c                          |    4 +-
>>  gcc/tree-loop-flattening.c                  |  630 +++++++++++++++++++++++++++
>>  gcc/tree-pass.h                             |    1 +
>>  16 files changed, 789 insertions(+), 2 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>>  create mode 100644 gcc/tree-loop-flattening.c
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index 3ceb7b6..f312b27 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,5 +1,23 @@
>>  2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
>>
>> +     * Makefile.in (OBJS-common): Add tree-loop-flattening.o.
>> +     (tree-loop-flattening.o): New.
>> +     * common.opt (ftree-loop-flatten): New.
>> +     * dbgcnt.def (lflat): New.
>> +     * params.def (PARAM_LFLAT_MAX_NB_BBS): New.
>> +     * passes.c (init_optimization_passes): Add new passes
>> +     pass_flatten_loops and pass_if_conversion after loop vectorization
>> +     and before pass_slp_vectorize.
>> +     * timevar.def (TV_TREE_LOOP_FLATTENING): New.
>> +     * tree-loop-flattening.c: New.
>> +     * tree-pass.h (pass_flatten_loops): Declared.
>> +     * tree-flow.h (gate_tree_if_conversion): Declared.
>> +     (tree_if_conversion): Declared.
>> +     * tree-if-conv.c (tree_if_conversion): Not static anymore.
>> +     (gate_tree_if_conversion): Same.
>> +
>> +2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
>> +
>>       * tree-if-conv.c (if_convertible_loop_p_1): Do not call
>>       compute_data_dependences_for_loop.
>>       (if_convertible_loop_p): Do not free refs and ddrs.
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 898e962..55b67f4 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -1368,6 +1368,7 @@ OBJS-common = \
>>       tree-into-ssa.o \
>>       tree-iterator.o \
>>       tree-loop-distribution.o \
>> +     tree-loop-flattening.o \
>>       tree-loop-linear.o \
>>       tree-nested.o \
>>       tree-nrv.o \
>> @@ -2773,6 +2774,9 @@ tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coret
>>     $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
>>     $(TREE_PASS_H) $(TREE_DATA_REF_H) $(EXPR_H) \
>>     langhooks.h $(TREE_VECTORIZER_H)
>> +tree-loop-flattening.o: tree-loop-flattening.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>> +   $(TM_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
>> +   $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(TREE_PASS_H) $(DBGCNT_H)
>>  tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
>>     $(TREE_FLOW_H) $(TREE_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
>>     $(DIAGNOSTIC_H) $(TREE_PASS_H) langhooks.h gt-tree-parloops.h \
>> diff --git a/gcc/common.opt b/gcc/common.opt
>> index 8fe796f..c969979 100644
>> --- a/gcc/common.opt
>> +++ b/gcc/common.opt
>> @@ -1632,6 +1632,10 @@ ftree-loop-distribute-patterns
>>  Common Report Var(flag_tree_loop_distribute_patterns) Optimization
>>  Enable loop distribution for patterns transformed into a library call
>>
>> +ftree-loop-flatten
>> +Common Report Var(flag_tree_loop_flattening) Optimization
>> +Enable loop flattening on trees
>> +
>>  ftree-loop-im
>>  Common Report Var(flag_tree_loop_im) Init(1) Optimization
>>  Enable loop invariant motion on trees
>> diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
>> index 0492d66..0ef9a72 100644
>> --- a/gcc/dbgcnt.def
>> +++ b/gcc/dbgcnt.def
>> @@ -166,6 +166,7 @@ DEBUG_COUNTER (if_conversion_tree)
>>  DEBUG_COUNTER (if_after_combine)
>>  DEBUG_COUNTER (if_after_reload)
>>  DEBUG_COUNTER (local_alloc_for_sched)
>> +DEBUG_COUNTER (lflat)
>>  DEBUG_COUNTER (postreload_cse)
>>  DEBUG_COUNTER (pre)
>>  DEBUG_COUNTER (pre_insn)
>> diff --git a/gcc/params.def b/gcc/params.def
>> index 49a6185..3fffc35 100644
>> --- a/gcc/params.def
>> +++ b/gcc/params.def
>> @@ -788,6 +788,13 @@ DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
>>         "maximum number of basic blocks per function to be analyzed by Graphite",
>>         100, 0, 0)
>>
>> +/* Maximal number of basic blocks in a loop to be flattened.  */
>> +
>> +DEFPARAM (PARAM_LFLAT_MAX_NB_BBS,
>> +       "lflat-max-nb-bbs",
>> +       "maximum number of basic blocks in a loop to be flattened",
>> +       100, 0, 0)
>> +
>>  /* Avoid doing loop invariant motion on very large loops.  */
>>
>>  DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
>> diff --git a/gcc/passes.c b/gcc/passes.c
>> index 1308ce9..22110a4 100644
>> --- a/gcc/passes.c
>> +++ b/gcc/passes.c
>> @@ -917,6 +917,7 @@ init_optimization_passes (void)
>>         NEXT_PASS (pass_parallelize_loops);
>>         NEXT_PASS (pass_loop_prefetch);
>>         NEXT_PASS (pass_iv_optimize);
>> +       NEXT_PASS (pass_flatten_loops);
>>         NEXT_PASS (pass_tree_loop_done);
>>       }
>>        NEXT_PASS (pass_cse_reciprocals);
>> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
>> index 4233f86..2b3b93e 100644
>> --- a/gcc/testsuite/ChangeLog
>> +++ b/gcc/testsuite/ChangeLog
>> @@ -1,5 +1,12 @@
>>  2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
>>
>> +     * gcc.dg/tree-ssa/flat-loop-1.c: New.
>> +     * gcc.dg/tree-ssa/flat-loop-2.c: New.
>> +     * gcc.dg/tree-ssa/flat-loop-3.c: New.
>> +     * gcc.dg/tree-ssa/flat-loop-4.c: New.
>> +
>> +2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
>> +
>>       PR tree-optimization/46029
>>       * g++.dg/tree-ssa/ifc-pr46029.C: New.
>>       * gcc.dg/tree-ssa/ifc-8.c: New.
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>> new file mode 100644
>> index 0000000..bee8a2b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +struct stack_segment
>> +{
>> +  struct dynamic_allocation_blocks *dynamic_allocation;
>> +};
>> +struct dynamic_allocation_blocks
>> +{
>> +  struct dynamic_allocation_blocks *next;
>> +};
>> +static struct dynamic_allocation_blocks *
>> +merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
>> +                   struct dynamic_allocation_blocks *b)
>> +{
>> +  struct dynamic_allocation_blocks **pp;
>> +  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
>> +    *pp = b;
>> +  return a;
>> +}
>> +__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
>> +{
>> +  struct dynamic_allocation_blocks *ret;
>> +  struct stack_segment *pss;
>> +  pss = *pp;
>> +  while (pss != ((void *)0))
>> +    ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>> new file mode 100644
>> index 0000000..a7287fb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
>> @@ -0,0 +1,39 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +struct stack_segment
>> +{
>> +  struct stack_segment *next;
>> +  struct dynamic_allocation_blocks *dynamic_allocation;
>> +};
>> +struct dynamic_allocation_blocks
>> +{
>> +  struct dynamic_allocation_blocks *next;
>> +};
>> +static struct dynamic_allocation_blocks *
>> +merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
>> +        struct dynamic_allocation_blocks *b)
>> +{
>> +  struct dynamic_allocation_blocks **pp;
>> +  if (b == ((void *)0))
>> +  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
>> +    ;
>> +  return a;
>> +}
>> +__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
>> +{
>> +  struct dynamic_allocation_blocks *ret;
>> +  struct stack_segment *pss;
>> +  while (pss != ((void *)0))
>> +    {
>> +      struct stack_segment *next;
>> +      next = pss->next;
>> + {
>> +   if (free_dynamic)
>> +     {
>> +       ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
>> +     }
>> + }
>> +      pss = next;
>> +    }
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>> new file mode 100644
>> index 0000000..d3d66ab
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +
>> +int
>> +split_directories (const char *name, int *ptr_num_dirs)
>> +{
>> +  int num_dirs = 0;
>> +  char **dirs;
>> +  const char *p, *q;
>> +  int ch;
>> +  while ((ch = *p++) != '\0')
>> +    {
>> +   num_dirs++;
>> +   while (((*p) == '/'))
>> +     p++;
>> +    }
>> +  return (dirs[num_dirs - 1] == ((void *)0));
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>> new file mode 100644
>> index 0000000..8e551ac
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-flatten" } */
>> +
>> +void
>> +formatted_backspace (int common, char *s)
>> +{
>> +  int base;
>> +  int n;
>> +  do
>> +    {
>> +      if (sseek (s, base, 0) < 0)
>> +     goto io_error;
>> +
>> +      while (n > 0)
>> +     {
>> +          n--;
>> +       base += n + 1;
>> +     }
>> +    }
>> +  while (base != 0);
>> + io_error:
>> +  generate_error (common, 0, ((void *)0));
>> +}
>
> The testcases seem to origin from ICEs found during development.  There
> is a lack of functional tests, please consider coming up with some,
> eventually testing for enabled extra optimizations.
>
>
>> diff --git a/gcc/timevar.def b/gcc/timevar.def
>> index 86e2999..89ff8e8 100644
>> --- a/gcc/timevar.def
>> +++ b/gcc/timevar.def
>> @@ -152,6 +152,7 @@ DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS    , "Graphite data dep analysis")
>>  DEFTIMEVAR (TV_GRAPHITE_CODE_GEN     , "Graphite code generation")
>>  DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
>>  DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
>> +DEFTIMEVAR (TV_TREE_LOOP_FLATTENING  , "tree loop flattening")
>>  DEFTIMEVAR (TV_CHECK_DATA_DEPS       , "tree check data dependences")
>>  DEFTIMEVAR (TV_TREE_PREFETCH      , "tree prefetching")
>>  DEFTIMEVAR (TV_TREE_LOOP_IVOPTS           , "tree iv optimization")
>> diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
>> index c2702dc..e1ee69f 100644
>> --- a/gcc/tree-flow.h
>> +++ b/gcc/tree-flow.h
>> @@ -730,6 +730,10 @@ bool contains_abnormal_ssa_name_p (tree);
>>  bool stmt_dominates_stmt_p (gimple, gimple);
>>  void mark_virtual_ops_for_renaming (gimple);
>>
>> +/* In tree-if-conv.c */
>> +bool gate_tree_if_conversion (void);
>> +bool tree_if_conversion (struct loop *, tree *);
>> +
>
> Why'd you need to export the gate?  I guess if-conversion should
> happen unconditionally for loops that are flattened as I see it is
> really part of the flattening transformation?
>
>>  /* In tree-ssa-dce.c */
>>  void mark_virtual_phi_result_for_renaming (gimple);
>>
>> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
>> index 5b941af..3c30abb 100644
>> --- a/gcc/tree-if-conv.c
>> +++ b/gcc/tree-if-conv.c
>> @@ -1599,7 +1599,7 @@ combine_blocks (struct loop *loop, tree *scratch_pad)
>>  /* If-convert LOOP when it is legal.  For the moment this pass has no
>>     profitability analysis.  Returns true when something changed.  */
>>
>> -static bool
>> +bool
>>  tree_if_conversion (struct loop *loop, tree *scratch_pad)
>>  {
>>    bool changed = false;
>> @@ -1662,7 +1662,7 @@ main_tree_if_conversion (void)
>>
>>  /* Returns true when the if-conversion pass is enabled.  */
>>
>> -static bool
>> +bool
>>  gate_tree_if_conversion (void)
>>  {
>>    return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0)
>> diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
>> new file mode 100644
>> index 0000000..4bc8768
>> --- /dev/null
>> +++ b/gcc/tree-loop-flattening.c
>> @@ -0,0 +1,630 @@
>> +/* Loop flattening.
>> +   Copyright (C) 2010 Free Software Foundation, Inc.
>> +   Contributed by Sebastian Pop <sebastian.pop@amd.com>.
>> +
>> +This file is part of GCC.
>> +
>> +GCC is free software; you can redistribute it and/or modify
>> +it under the terms of the GNU General Public License as published by
>> +the Free Software Foundation; either version 3, or (at your option)
>> +any later version.
>> +
>> +GCC is distributed in the hope that it will be useful,
>> +but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +GNU General Public License for more details.
>> +
>> +You should have received a copy of the GNU General Public License
>> +along with GCC; see the file COPYING3.  If not see
>> +<http://www.gnu.org/licenses/>.  */
>> +
>> +#include "config.h"
>> +#include "system.h"
>> +#include "coretypes.h"
>> +#include "tm.h"
>> +#include "ggc.h"
>> +#include "tree.h"
>> +#include "rtl.h"
>> +#include "output.h"
>> +#include "basic-block.h"
>> +#include "diagnostic.h"
>> +#include "tree-flow.h"
>> +#include "toplev.h"
>> +#include "tree-dump.h"
>> +#include "timevar.h"
>> +#include "cfgloop.h"
>> +#include "tree-pass.h"
>> +#include "gimple.h"
>> +#include "params.h"
>> +#include "dbgcnt.h"
>> +
>> +/* This loop flattening pass transforms backward pointing edges into
>> +   forward pointing edges.
>> +
>> +   The back-edge removal transformation was described in the 1983
>> +   paper by Allen J. R., Ken Kennedy, Carrie Porterfield, and Joe
>> +   Warren: "Conversion of control dependence to data dependence"
>> +   available from http://doi.acm.org/10.1145/567067.567085
>> +
>> +   The back-edge removal algorithm was presented in that paper as part
>> +   of the if-conversion algorithm for backward pointing edges.  In
>> +   this section we will first provide a description of this technique
>> +   adapted for the Gimple-SSA form, followed by an example, and a
>> +   discussion of the differences with the higher level loop flattening
>> +   transformation.
>> +
>> +   The back-edge removal algorithm transforms control dependences into
>> +   data dependences by using a boolean variable.  The values taken by
>> +   the boolean variable control the execution path of the forward
>> +   edges created in order to use the back-edge of an outer loop.
>> +
>> +   The first step of the algorithm detects a surrounding loop and all
>> +   the back-edges of the loop body: these back-edges can be inner
>> +   loops or strongly connected components of the CFG that cannot be
>> +   reduced to natural loops.
>> +
>> +   Each back-edge is removed by redirecting the target of the
>> +   back-edge to the latch basic block of the surrounding loop.  A
>> +   boolean variable is created in the latch.  It is cleared when the
>> +   redirected back-edge is taken and it is set to true for any other
>> +   paths leading to the latch.
>> +
>> +   The header basic block of the surrounding loop is split before its
>> +   statements and a new condition is added based on the control
>> +   variable: when the control variable is set to true, the execution
>> +   proceeds as normal to the basic block that contains the statements
>> +   of the header; when the control variable is cleared, meaning that
>> +   the back-edge has been taken, the execution proceeds to the point
>> +   where the redirected back-edge was pointing.
>> +
>> +   The last step updates the SSA form after all the back-edges have
>> +   been redirected to the latch, and the new edges from the header to
>> +   the destination of back-edges have been created.
>> +
>> +   Another description of loop flattening in a very Fortran specific
>> +   way is in the 1992 paper by Reinhard von Hanxleden and Ken Kennedy:
>> +   "Relaxing SIMD Control Flow Constraints using Loop Transformations"
>> +   available from
>> +   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */
>> +
>> +/* Keep the loop structure for LOOP and remove all the loop structures
>> +   under LOOP.  */
>> +
>> +static void
>> +cancel_subloops (loop_p loop)
>> +{
>> +  int i;
>> +  loop_p li;
>> +  VEC (loop_p, heap) *lv = VEC_alloc (loop_p, heap, 3);
>> +
>> +  for (li = loop->inner; li; li = li->next)
>> +    VEC_safe_push (loop_p, heap, lv, li);
>> +
>> +  FOR_EACH_VEC_ELT (loop_p, lv, i, li)
>> +    cancel_loop_tree (li);
>> +
>> +  VEC_free (loop_p, heap, lv);
>> +}
>
> This function should be in cfgloop.c and implemented in simpler
> form, like
>
> void
> cancel_subloops (struct loop *loop)
> {
>  while (loop->inner)
>    cancel_loop_tree (loop->inner);
> }
>
> simply following the cancel_loop_tree example.
>
>> +/* Before creating other phi nodes in LOOP->header for the control
>> +   flags, update the phi nodes of LOOP->header and add the necessary
>> +   phi nodes in the LOOP->latch that now contains several paths on
>> +   which the values are not updated.  PRED_E is the single edge that
>> +   was pointing to the LOOP->latch basic block before inner back-edges
>> +   were redirected to the LOOP->latch.  */
>> +
>> +static void
>> +update_loop_phi_nodes (loop_p loop, edge pred_e)
>> +{
>> +  gimple_stmt_iterator gsi;
>> +
>> +  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      edge e;
>> +      edge_iterator ei;
>> +      gimple phi = gsi_stmt (gsi);
>> +      tree back_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
>> +      tree res = gimple_phi_result (phi);
>> +      tree var = SSA_NAME_VAR (res);
>> +
>> +      phi = create_phi_node (var, loop->latch);
>> +      create_new_def_for (gimple_phi_result (phi), phi,
>> +                       gimple_phi_result_ptr (phi));
>
> Using create_new_def_for looks very suspicios.  create_phi_node
> will already create a new SSA name for you for the result, so
> it doesn't make any sense to fiddle with the SSA updaters machinery, does
> it?
>
>> +      FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> +     add_phi_arg (phi, (e == pred_e ? back_arg : res),
>> +                  e, UNKNOWN_LOCATION);
>> +
>> +      res = gimple_phi_result (phi);
>> +      add_phi_arg (gsi_stmt (gsi), res, loop_latch_edge (loop),
>> +                UNKNOWN_LOCATION);
>> +    }
>> +}
>> +
>> +/* Creates a control flag for the FORWARDED_EDGE that represents the
>> +   back-edge that has been forwarded to the latch basic block of LOOP.
>> +   INNER_BODY is the basic block to which the back-edge was pointing
>> +   before redirection.  This function creates a boolean control flag
>> +   that is cleared when the FORWARDED_EDGE is taken and set for all
>> +   the other paths.  This function adds the corresponding phi nodes in
>> +   LOOP->latch and LOOP->header, and finally adds an edge from
>> +   LOOP->header to the INNER_BODY guarded by the control flag.  */
>> +
>> +static void
>> +create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
>> +{
>> +  edge e, preheader;
>> +  edge outer_latch_e = loop_latch_edge (loop);
>> +  const char *name = "_flat_";
>> +  tree var = create_tmp_var (boolean_type_node, name);
>
> create_tmp_reg
>
>> +  tree res;
>> +  gimple phi, cond_stmt;
>> +  gimple_stmt_iterator gsi;
>> +  edge_iterator ei;
>> +
>> +  /* Adds a control variable for the redirected FORWARDED_EDGE.  */
>> +  add_referenced_var (var);
>> +  phi = create_phi_node (var, forwarded_edge->dest);
>> +  create_new_def_for (gimple_phi_result (phi), phi,
>> +                   gimple_phi_result_ptr (phi));
>
> Likewise.
>
>> +  FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds)
>> +    add_phi_arg (phi, (e == forwarded_edge
>> +                    ? boolean_false_node
>> +                    : boolean_true_node),
>> +              e, UNKNOWN_LOCATION);
>> +  res = gimple_phi_result (phi);
>> +
>> +  /* Add a phi node in LOOP->header for the control variable.  */
>> +  phi = create_phi_node (var, loop->header);
>> +  create_new_def_for (gimple_phi_result (phi), phi,
>> +                   gimple_phi_result_ptr (phi));
>
> Again.
>
>> +  preheader = loop_preheader_edge (loop);
>> +  FOR_EACH_EDGE (e, ei, loop->header->preds)
>> +    add_phi_arg (phi, (e == preheader
>> +                    ? boolean_true_node
>> +                    : res),
>> +              e, UNKNOWN_LOCATION);
>> +  res = gimple_phi_result (phi);
>> +
>> +  /* Split LOOP->header to insert the control variable condition.  */
>> +  e = split_block_after_labels (loop->header);
>> +  e->flags = EDGE_TRUE_VALUE;
>> +  e = make_edge (loop->header, inner_body, EDGE_FALSE_VALUE);
>> +  cond_stmt = gimple_build_cond (EQ_EXPR, res, boolean_true_node,
>> +                              NULL_TREE, NULL_TREE);
>> +  gsi = gsi_last_bb (loop->header);
>> +  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
>> +}
>> +
>> +/* Adds phi nodes to the LOOP->header and LOOP->latch for the ssa_name
>> +   NAME.  ARG is the argument of the latch phi node set for the
>> +   FORWARDED_EDGE, and all the other edges merged by the latch phi
>> +   node are set to the result of the LOOP->header phi node.  The latch
>> +   edge of the LOOP->header phi node is set to the result of the
>> +   LOOP->latch phi node, and the other argument is set to an arbitrary
>> +   valid value defined before the loop (note that this initial value
>> +   is never used in the loop).  Returns the LOOP->header phi result.  */
>> +
>> +static tree
>> +add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
>> +                        tree arg)
>> +{
>> +  edge e;
>> +  edge_iterator ei;
>> +  tree res, zero, var = SSA_NAME_VAR (name);
>> +  gimple loop_phi = create_phi_node (var, loop->header);
>> +  gimple latch_phi = create_phi_node (var, loop->latch);
>> +
>> +  create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
>> +                   gimple_phi_result_ptr (loop_phi));
>> +  create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
>> +                   gimple_phi_result_ptr (latch_phi));
>
> Likewise.
>
>> +  /* The value set to ZERO will never be used in the loop, however we
>> +     have to construct something meaningful for virtual SSA_NAMEs.  */
>> +  if (TREE_CODE (arg) != SSA_NAME)
>> +    zero = arg;
>> +  else if (is_gimple_reg (arg))
>> +    zero = fold_convert (TREE_TYPE (arg), integer_zero_node);
>> +  else
>> +    zero = gimple_default_def (cfun, SSA_NAME_VAR (arg));
>
> That looks bogus.  It will create overlapping life-ranges
> for virtual operands - just make sure you'll rename the VOPs
> and use gimple_vop (cfun) for the fallback.  You shoudl also
> use build_zero_cst instead of fold_convert.
>
> Thus,
>
>  mark_sym_for_renaming (gimple_vop (cfun));
>
>> +  res = gimple_phi_result (latch_phi);
>> +  FOR_EACH_EDGE (e, ei, loop->header->preds)
>> +    add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
>> +              e, UNKNOWN_LOCATION);
>> +
>> +  res = gimple_phi_result (loop_phi);
>> +  FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> +    add_phi_arg (latch_phi, (e == forwarded_edge ? arg : res),
>> +              e, UNKNOWN_LOCATION);
>> +
>> +  return res;
>> +}
>> +
>> +/* Creates phi nodes for each inductive definition, i.e., loop phi
>> +   nodes.  For each induction phi node in the old loop header, i.e.,
>> +   in the single_succ (INNER_BODY), insert a phi node in the
>> +   LOOP->latch that takes the updated value of the induction on the
>> +   FORWARDED_EDGE, and maintains the same value as in the phi node of
>> +   the LOOP->header for all the other possible paths reaching
>> +   LOOP->latch.  This function has to be called after all the
>> +   back-edges have been redirected.  */
>> +
>> +static void
>> +update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop,
>> +                               basic_block inner_body)
>> +{
>> +  gimple_stmt_iterator gsi;
>> +
>> +  for (gsi = gsi_start_phis (single_succ (inner_body));
>> +       !gsi_end_p (gsi); gsi_next (&gsi))
>> +    {
>> +      gimple old_loop_phi = gsi_stmt (gsi);
>> +      tree back_arg = PHI_ARG_DEF_FROM_EDGE (old_loop_phi,
>> +                                          single_succ_edge (inner_body));
>> +      tree res = gimple_phi_result (old_loop_phi);
>> +
>> +      res = add_header_and_latch_phis (loop, res, forwarded_edge, back_arg);
>> +      add_phi_arg (old_loop_phi, res, single_succ_edge (inner_body),
>> +                UNKNOWN_LOCATION);
>> +    }
>> +}
>> +
>> +/* Renames all the uses of OLD_NAME with NEW_NAME (except the phi
>> +   nodes of DEF_BB) in all the basic blocks dominated by DEF_BB and in
>> +   the arguments of all the phi nodes originating in a basic block
>> +   that is dominated by DEF_BB.  */
>> +
>> +static void
>> +rename_dominated_uses (loop_p loop, tree old_name, tree new_name,
>> +                    basic_block def_bb)
>> +{
>> +  imm_use_iterator uit;
>> +  gimple stmt;
>> +  use_operand_p use_p;
>> +  ssa_op_iter op_iter;
>> +
>> +  FOR_EACH_IMM_USE_STMT (stmt, uit, old_name)
>> +    {
>> +      enum gimple_code code = gimple_code (stmt);
>> +      basic_block use_bb = gimple_bb (stmt);
>> +      edge_iterator ei;
>> +      edge e;
>> +
>> +      if (code == GIMPLE_PHI)
>> +     {
>> +       FOR_EACH_EDGE (e, ei, use_bb->preds)
>> +         if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name
>> +             && dominated_by_p (CDI_DOMINATORS, e->src, def_bb)
>> +             && use_bb != def_bb)
>> +           replace_exp (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx),
>> +                        new_name);
>
>  SET_USE (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx), new_name);
>
>> +     }
>> +      else
>> +     {
>> +       if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb))
>> +         continue;
>> +
>> +       if (use_bb->loop_father == loop)
>> +         {
>> +           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
>> +             if (USE_FROM_PTR (use_p) == old_name)
>> +               replace_exp (use_p, new_name);
>> +         }
>> +       else
>> +         /* Virtual operands are not translated into loop closed
>> +            SSA form, and thus they may occur in the rest of
>> +            the program without a loop close vphi node.  */
>
> But you are updating all uses again.
>
>  You should simply use
>
>        FOR_EACH_IMM_USE_ON_STMT (use_p, uit)
>          SET_USE (use_p, new_name);
>
>> +         FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
>> +           if (USE_FROM_PTR (use_p) == old_name)
>> +             replace_exp (use_p, new_name);
>> +     }
>> +    }
>> +}
>> +
>> +/* Helper function for add_missing_phi_nodes_1.  Adds to LOOP all the
>> +   missing phi nodes for NAME and updates the arguments of the
>> +   LATCH_PHI node.  LOOP_PHI node is the inductive definition of NAME
>> +   in LOOP->header.  */
>> +
>> +static void
>> +add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
>> +                      VEC (gimple, heap) *phis)
>> +{
>> +  unsigned i;
>> +  basic_block bb, dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
>> +  VEC (basic_block, heap) *dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS,
>> +                                                            dom_bb);
>> +
>> +  FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb)
>> +    {
>> +      edge e;
>> +      edge_iterator ei;
>> +
>> +      if (bb == loop->latch
>> +       || bb->loop_father != loop)
>> +     continue;
>
> dom_bbs may be very large, it's much better to iterate over the
> loop bbs and do a dominator check.  Or iterate over dominator sons
> with first_dom_son (), next_dom_son () and recurse, bailing out when
> you're running out of the loop.
>
>> +      FOR_EACH_EDGE (e, ei, bb->succs)
>> +     {
>> +       gimple phi = VEC_index (gimple, phis, e->dest->index);
>> +
>> +       if (phi)
>> +         add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
>> +
>> +       else if (!single_pred_p (e->dest)
>> +                && !dominated_by_p (CDI_DOMINATORS, e->dest, dom_bb)
>> +                && e->dest->loop_father == loop)
>> +       {
>> +         tree var = SSA_NAME_VAR (name);
>> +
>> +         phi = create_phi_node (var, e->dest);
>> +         create_new_def_for (gimple_phi_result (phi), phi,
>> +                             gimple_phi_result_ptr (phi));
>
> Again.
>
>> +         VEC_replace (gimple, phis, e->dest->index, phi);
>> +         add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
>> +         rename_dominated_uses (loop, old_name, gimple_phi_result (phi),
>> +                                e->dest);
>> +         add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
>> +                                  phis);
>> +       }
>> +     }
>> +    }
>
> You leak dom_bbs.
>
>> +}
>> +
>> +/* Helper function for add_missing_phi_nodes.  For all the definitions
>> +   of DEF_STMT add the missing phi nodes in LOOP.  */
>> +
>> +static void
>> +add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
>> +{
>> +  def_operand_p def_p;
>> +  ssa_op_iter op_iter;
>> +  basic_block bb = gimple_bb (def_stmt);
>> +
>> +  FOR_EACH_PHI_OR_STMT_DEF (def_p, def_stmt, op_iter, SSA_OP_DEF|SSA_OP_VDEF)
>> +    {
>> +      edge e;
>> +      edge_iterator ei;
>> +      tree res, zero, var;
>> +      gimple loop_phi, latch_phi, use_stmt;
>> +      imm_use_iterator uit;
>> +      tree name = DEF_FROM_PTR (def_p);
>> +      bool needs_update = false;
>> +      VEC (gimple, heap) *phis;
>> +      int i;
>> +
>> +      FOR_EACH_IMM_USE_STMT (use_stmt, uit, name)
>> +     {
>> +       basic_block use_bb = gimple_bb (use_stmt);
>> +
>> +       if (!dominated_by_p (CDI_DOMINATORS, bb, use_bb))
>> +         {
>> +           needs_update = true;
>> +           BREAK_FROM_IMM_USE_STMT (uit);
>> +         }
>> +     }
>> +
>> +      if (!needs_update)
>> +     continue;
>> +
>> +      var = SSA_NAME_VAR (name);
>> +      loop_phi = create_phi_node (var, loop->header);
>> +      latch_phi = create_phi_node (var, loop->latch);
>> +
>> +      create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
>> +                       gimple_phi_result_ptr (loop_phi));
>> +      create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
>> +                       gimple_phi_result_ptr (latch_phi));
>
> Again.
>
>> +      /* The value set to ZERO will never be used in the loop, however we
>> +      have to construct something meaningful for virtual SSA_NAMEs.  */
>> +      if (is_gimple_reg (name))
>> +     zero = fold_convert (TREE_TYPE (name), integer_zero_node);
>> +      else
>> +     zero = gimple_default_def (cfun, SSA_NAME_VAR (name));
>> +
>> +      res = gimple_phi_result (latch_phi);
>> +      FOR_EACH_EDGE (e, ei, loop->header->preds)
>> +     add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
>> +                  e, UNKNOWN_LOCATION);
>> +
>> +      res = gimple_phi_result (loop_phi);
>> +      FOR_EACH_EDGE (e, ei, loop->latch->preds)
>> +     add_phi_arg (latch_phi, res, e, UNKNOWN_LOCATION);
>> +
>> +      phis = VEC_alloc (gimple, heap, n_basic_blocks);
>> +      for (i = 0; i < n_basic_blocks; i++)
>> +     VEC_quick_push (gimple, phis, NULL);
>> +
>> +      VEC_replace (gimple, phis, loop->latch->index, latch_phi);
>> +      VEC_replace (gimple, phis, loop->header->index, loop_phi);
>> +      add_missing_phi_nodes_2 (loop, name, name, phis);
>> +
>> +      for (i = 0; i < n_basic_blocks; i++)
>> +     {
>> +       gimple phi = VEC_index (gimple, phis, i);
>> +
>> +       if (!phi)
>> +         continue;
>> +
>> +       FOR_EACH_EDGE (e, ei, BASIC_BLOCK (i)->preds)
>> +         if (!PHI_ARG_DEF_FROM_EDGE (phi, e))
>> +           add_phi_arg (phi, res, e, UNKNOWN_LOCATION);
>> +     }
>> +
>> +      VEC_free (gimple, heap, phis);
>> +    }
>> +}
>> +
>> +/* Walks over the code of LOOP and adds the missing phi nodes at
>> +   control flow junctions.  When a variable is defined in an outer
>> +   loop and used in an inner loop, the definition dominates the use.
>> +   After the loop flattening, the inner loop body is directly
>> +   reachable from the LOOP->header by using the added edge guarded by
>> +   the boolean flag that controls the execution of the back-edge that
>> +   was eliminated.  In this case, the use is not dominated by the
>> +   definition, and this function adds the missing phi nodes.  */
>> +
>> +static void
>> +add_missing_phi_nodes (loop_p loop)
>> +{
>> +  gimple_stmt_iterator gsi;
>> +  int i, n = loop->num_nodes;
>> +  basic_block *bbs = get_loop_body (loop);
>
> So you can even pass this down to add_missing_phi_nodes_2.  Or
> even use get_loop_body_in_dom_order and thus only need to walk
> adjacent blocks in that array.
>
>> +  for (i = 0; i < n; i++)
>> +    {
>> +      basic_block bb = bbs[i];
>> +
>> +      /* LOOP->header dominates all the blocks of the loop body, and
>> +      so we don't have to look at the missing phi nodes for the
>> +      definitions of LOOP->header.  */
>> +      if (bb == loop->header)
>> +     continue;
>> +
>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +     if (!gimple_nop_p (gsi_stmt (gsi)))
>> +       add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
>> +
>> +      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +     add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
>> +    }
>> +
>> +  free (bbs);
>> +}
>> +
>> +/* Removes all the back-edges of LOOP except its own back-edge.
>> +   SCRATCH_PAD is used in if-conversion.  */
>> +
>> +static unsigned
>> +flatten_loop (loop_p loop, tree *scratch_pad)
>> +{
>> +  int i, n = loop->num_nodes;
>> +  basic_block *bbs;
>> +  VEC (edge, heap) *back_edges;
>> +  VEC (basic_block, heap) *loop_body;
>> +  edge_iterator ei;
>> +  edge e, pred_e;
>> +  unsigned max_nb_basic_blocks = PARAM_VALUE (PARAM_LFLAT_MAX_NB_BBS);;
>> +
>> +  if (loop->num_nodes > max_nb_basic_blocks
>> +      || !single_exit (loop)
>> +      || !dbg_cnt (lflat))
>> +    return 0;
>> +
>> +  mark_dfs_back_edges ();
>> +  bbs = get_loop_body (loop);
>> +
>> +  back_edges = VEC_alloc (edge, heap, 3);
>> +  loop_body = VEC_alloc (basic_block, heap, 3);
>> +
>> +  for (i = 0; i < n; i++)
>> +    FOR_EACH_EDGE (e, ei, bbs[i]->succs)
>> +      if (e->flags & EDGE_DFS_BACK
>> +       && e->src != loop->latch)
>> +     VEC_safe_push (edge, heap, back_edges, e);
>> +
>> +  free (bbs);
>> +
>> +  /* Early return and do not modify the code when there are no back
>> +     edges.  */
>> +  if (VEC_empty (edge, back_edges))
>> +    return 0;
>> +
>> +  cancel_subloops (loop);
>> +
>> +  /* Split the latch edge to make sure that the latch basic block does
>> +     not contain code.  */
>> +  loop->latch = split_edge (loop_latch_edge (loop));
>> +  pred_e = single_pred_edge (loop->latch);
>> +
>> +  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> +    {
>> +      basic_block dest = split_edge (e);
>> +
>> +      /* Redirect BACK_EDGE to LOOP->latch.  */
>> +      redirect_edge_and_branch_force (e, loop->latch);
>> +
>> +      /* Save the basic block where it was pointing.  */
>> +      VEC_safe_push (basic_block, heap, loop_body, dest);
>> +    }
>> +
>> +  update_loop_phi_nodes (loop, pred_e);
>> +
>> +  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> +    create_control_flag (e, loop, VEC_index (basic_block, loop_body, i));
>> +
>> +  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
>> +    update_inner_induction_phi_nodes (e, loop, VEC_index (basic_block,
>> +                                                       loop_body, i));
>> +
>> +  free_dominance_info (CDI_DOMINATORS);
>> +  calculate_dominance_info (CDI_DOMINATORS);
>> +  add_missing_phi_nodes (loop);
>> +
>> +  /* If we redirected some back-edges, split the latch edge to create
>> +     an empty LOOP->latch.  */
>> +  if (!single_pred_p (loop->latch))
>> +    loop->latch = split_edge (loop_latch_edge (loop));
>> +
>> +  if (gate_tree_if_conversion ())
>> +    tree_if_conversion (loop, scratch_pad);
>
> You are leaking VECs.  As mentioned above testing the gate isn't
> necessary here.
>
>> +  return TODO_update_ssa | TODO_verify_ssa;
>
> These TODOs belong in the pass structure.
>
>> +}
>> +
>> +/* Flattens all the loops of the current function.  */
>> +
>> +static unsigned int
>> +tree_loop_flattening (void)
>> +{
>> +  unsigned todo = 0;
>> +  loop_p loop;
>> +  loop_iterator li;
>> +  tree scratch_pad = NULL_TREE;
>> +
>> +  if (number_of_loops () <= 1)
>> +    return 0;
>> +
>> +  FOR_EACH_LOOP (li, loop, 0)
>> +    todo |= flatten_loop (loop, &scratch_pad);
>
> So we might end up recursively flattening loops (or not, as this
> walk is in undefined order).  I'd say you want LI_ONLY_INNERMOST here,
> or do you really want to flatten all loop trees up to the number
> of basic blocks specified in the parm?  I guess not.
>
> I think the pass misses a cost model and I'm still not sure when
> or if it will be profitable to do this at all (as said, no
> functional testcases).  What's the immediate benefit for GCC 4.6?
>
>> +#ifdef ENABLE_CHECKING
>> +  verify_dominators (CDI_DOMINATORS);
>> +  verify_flow_info ();
>> +#endif
>> +
>> +  cleanup_tree_cfg ();
>> +  return todo;
>
> return TODO_cleanup_cfg, but only if you flattened a loop.  So
> return TODO_cleanup_cfg from flatten_loop instead.
>
> Richard.
>
>> +}
>> +
>> +static bool
>> +gate_tree_loop_flattening (void)
>> +{
>> +  return flag_tree_loop_flattening != 0;
>> +}
>> +
>> +struct gimple_opt_pass pass_flatten_loops =
>> +{
>> + {
>> +  GIMPLE_PASS,
>> +  "lflat",                           /* name */
>> +  gate_tree_loop_flattening,         /* gate */
>> +  tree_loop_flattening,                      /* execute */
>> +  NULL,                                      /* sub */
>> +  NULL,                                      /* next */
>> +  0,                                 /* static_pass_number */
>> +  TV_TREE_LOOP_FLATTENING,           /* tv_id */
>> +  PROP_cfg | PROP_ssa,                       /* properties_required */
>> +  0,                                 /* properties_provided */
>> +  0,                                 /* properties_destroyed */
>> +  0,                                 /* todo_flags_start */
>> +  TODO_dump_func
>> +    | TODO_update_ssa
>> +    | TODO_ggc_collect                       /* todo_flags_finish */
>> + }
>> +};
>> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
>> index a87a770..e2f257f 100644
>> --- a/gcc/tree-pass.h
>> +++ b/gcc/tree-pass.h
>> @@ -374,6 +374,7 @@ extern struct gimple_opt_pass pass_graphite;
>>  extern struct gimple_opt_pass pass_graphite_transforms;
>>  extern struct gimple_opt_pass pass_if_conversion;
>>  extern struct gimple_opt_pass pass_loop_distribution;
>> +extern struct gimple_opt_pass pass_flatten_loops;
>>  extern struct gimple_opt_pass pass_vectorize;
>>  extern struct gimple_opt_pass pass_slp_vectorize;
>>  extern struct gimple_opt_pass pass_complete_unroll;
>>
>
> --
> Richard Guenther <rguenther@suse.de>
> Novell / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex
>

Patch

From 0e4f1d41f599fbbb5d338945f1b30c6ad8c37618 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Tue, 16 Nov 2010 15:40:28 -0600
Subject: [PATCH 12/12] Fix pass TODOs.

---
 gcc/tree-loop-flattening.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
index 3613dc9..cd49be1 100644
--- a/gcc/tree-loop-flattening.c
+++ b/gcc/tree-loop-flattening.c
@@ -530,7 +530,7 @@  flatten_loop (loop_p loop, tree *scratch_pad)
   VEC_free (edge, heap, back_edges);
   VEC_free (basic_block, heap, loop_body);
 
-  return TODO_update_ssa | TODO_verify_ssa;
+  return TODO_cleanup_cfg;
 }
 
 /* Flattens all the loops of the current function.  */
@@ -554,7 +554,6 @@  tree_loop_flattening (void)
   verify_flow_info ();
 #endif
 
-  cleanup_tree_cfg ();
   return todo;
 }
 
@@ -580,7 +579,8 @@  struct gimple_opt_pass pass_flatten_loops =
   0,					/* properties_destroyed */
   0,					/* todo_flags_start */
   TODO_dump_func
-    | TODO_update_ssa
-    | TODO_ggc_collect			/* todo_flags_finish */
+  | TODO_verify_ssa
+  | TODO_update_ssa
+  | TODO_ggc_collect			/* todo_flags_finish */
  }
 };
-- 
1.7.0.4