From patchwork Thu Oct 28 22:58:17 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sebastian Pop X-Patchwork-Id: 69507 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 935C1B70D1 for ; Fri, 29 Oct 2010 09:59:08 +1100 (EST) Received: (qmail 6686 invoked by alias); 28 Oct 2010 22:59:06 -0000 Received: (qmail 6658 invoked by uid 22791); 28 Oct 2010 22:59:03 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_CF, TW_DB, TW_TM, TW_VP, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-gx0-f175.google.com (HELO mail-gx0-f175.google.com) (209.85.161.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 28 Oct 2010 22:58:53 +0000 Received: by mail-gx0-f175.google.com with SMTP id 26so235906gxk.20 for ; Thu, 28 Oct 2010 15:58:53 -0700 (PDT) Received: by 10.151.105.6 with SMTP id h6mr21096565ybm.354.1288306732583; Thu, 28 Oct 2010 15:58:52 -0700 (PDT) Received: from napoca ([76.244.77.0]) by mx.google.com with ESMTPS id q36sm1166290ybk.18.2010.10.28.15.58.46 (version=TLSv1/SSLv3 cipher=RC4-MD5); Thu, 28 Oct 2010 15:58:50 -0700 (PDT) Received: by napoca (sSMTP sendmail emulation); Thu, 28 Oct 2010 17:58:44 -0500 From: Sebastian Pop To: gcc-patches@gcc.gnu.org Cc: rguenther@suse.de, Sebastian Pop Subject: [PATCH 1/6] Loop flattening on loop-SSA. Date: Thu, 28 Oct 2010 17:58:17 -0500 Message-Id: <1288306702-5543-2-git-send-email-sebpop@gmail.com> In-Reply-To: <1288306702-5543-1-git-send-email-sebpop@gmail.com> References: <1288306702-5543-1-git-send-email-sebpop@gmail.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org 2010-10-20 Sebastian Pop * Makefile.in (OBJS-common): Add tree-loop-flattening.o. (tree-loop-flattening.o): New. * common.opt (ftree-loop-flatten): New. * dbgcnt.def (lflat): New. * params.def (PARAM_LFLAT_MAX_NB_BBS): New. * passes.c (init_optimization_passes): Add new passes pass_flatten_loops and pass_if_conversion after loop vectorization and before pass_slp_vectorize. * timevar.def (TV_TREE_LOOP_FLATTENING): New. * tree-loop-flattening.c: New. * tree-pass.h (pass_flatten_loops): Declared. * gcc.dg/tree-ssa/flat-loop-1.c: New. * gcc.dg/tree-ssa/flat-loop-2.c: New. * gcc.dg/tree-ssa/flat-loop-3.c: New. * gcc.dg/tree-ssa/flat-loop-4.c: New. --- gcc/ChangeLog | 14 + gcc/Makefile.in | 4 + gcc/common.opt | 4 + gcc/dbgcnt.def | 1 + gcc/params.def | 7 + gcc/passes.c | 2 + gcc/testsuite/ChangeLog | 7 + gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c | 28 ++ gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c | 39 ++ gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c | 19 + gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c | 23 + gcc/timevar.def | 1 + gcc/tree-loop-flattening.c | 625 +++++++++++++++++++++++++++ gcc/tree-pass.h | 1 + 14 files changed, 775 insertions(+), 0 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c create mode 100644 gcc/tree-loop-flattening.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index beed454..a0148d2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2010-10-20 Sebastian Pop + + * Makefile.in (OBJS-common): Add tree-loop-flattening.o. + (tree-loop-flattening.o): New. + * common.opt (ftree-loop-flatten): New. + * dbgcnt.def (lflat): New. + * params.def (PARAM_LFLAT_MAX_NB_BBS): New. + * passes.c (init_optimization_passes): Add new passes + pass_flatten_loops and pass_if_conversion after loop vectorization + and before pass_slp_vectorize. + * timevar.def (TV_TREE_LOOP_FLATTENING): New. + * tree-loop-flattening.c: New. + * tree-pass.h (pass_flatten_loops): Declared. + 2010-10-20 Nathan Froyd * ifcvt.c (noce_emit_cmove): If both of the values are SUBREGs, try diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 898e962..55b67f4 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1368,6 +1368,7 @@ OBJS-common = \ tree-into-ssa.o \ tree-iterator.o \ tree-loop-distribution.o \ + tree-loop-flattening.o \ tree-loop-linear.o \ tree-nested.o \ tree-nrv.o \ @@ -2773,6 +2774,9 @@ tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coret $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(TREE_PASS_H) $(TREE_DATA_REF_H) $(EXPR_H) \ langhooks.h $(TREE_VECTORIZER_H) +tree-loop-flattening.o: tree-loop-flattening.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ + $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(TREE_PASS_H) $(DBGCNT_H) tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_FLOW_H) $(TREE_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \ $(DIAGNOSTIC_H) $(TREE_PASS_H) langhooks.h gt-tree-parloops.h \ diff --git a/gcc/common.opt b/gcc/common.opt index 8fe796f..c969979 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1632,6 +1632,10 @@ ftree-loop-distribute-patterns Common Report Var(flag_tree_loop_distribute_patterns) Optimization Enable loop distribution for patterns transformed into a library call +ftree-loop-flatten +Common Report Var(flag_tree_loop_flattening) Optimization +Enable loop flattening on trees + ftree-loop-im Common Report Var(flag_tree_loop_im) Init(1) Optimization Enable loop invariant motion on trees diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def index 0492d66..0ef9a72 100644 --- a/gcc/dbgcnt.def +++ b/gcc/dbgcnt.def @@ -166,6 +166,7 @@ DEBUG_COUNTER (if_conversion_tree) DEBUG_COUNTER (if_after_combine) DEBUG_COUNTER (if_after_reload) DEBUG_COUNTER (local_alloc_for_sched) +DEBUG_COUNTER (lflat) DEBUG_COUNTER (postreload_cse) DEBUG_COUNTER (pre) DEBUG_COUNTER (pre_insn) diff --git a/gcc/params.def b/gcc/params.def index 49a6185..3fffc35 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -788,6 +788,13 @@ DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION, "maximum number of basic blocks per function to be analyzed by Graphite", 100, 0, 0) +/* Maximal number of basic blocks in a loop to be flattened. */ + +DEFPARAM (PARAM_LFLAT_MAX_NB_BBS, + "lflat-max-nb-bbs", + "maximum number of basic blocks in a loop to be flattened", + 100, 0, 0) + /* Avoid doing loop invariant motion on very large loops. */ DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP, diff --git a/gcc/passes.c b/gcc/passes.c index 1308ce9..4b778bc 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -913,6 +913,8 @@ init_optimization_passes (void) } NEXT_PASS (pass_predcom); NEXT_PASS (pass_complete_unroll); + NEXT_PASS (pass_flatten_loops); + NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_slp_vectorize); NEXT_PASS (pass_parallelize_loops); NEXT_PASS (pass_loop_prefetch); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9d9c543..8ab520e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2010-10-20 Sebastian Pop + + * gcc.dg/tree-ssa/flat-loop-1.c: New. + * gcc.dg/tree-ssa/flat-loop-2.c: New. + * gcc.dg/tree-ssa/flat-loop-3.c: New. + * gcc.dg/tree-ssa/flat-loop-4.c: New. + 2010-10-20 Rainer Orth PR c++/46024 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c new file mode 100644 index 0000000..bee8a2b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-flatten" } */ + +struct stack_segment +{ + struct dynamic_allocation_blocks *dynamic_allocation; +}; +struct dynamic_allocation_blocks +{ + struct dynamic_allocation_blocks *next; +}; +static struct dynamic_allocation_blocks * +merge_dynamic_blocks (struct dynamic_allocation_blocks *a, + struct dynamic_allocation_blocks *b) +{ + struct dynamic_allocation_blocks **pp; + for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next) + *pp = b; + return a; +} +__morestack_release_segments (struct stack_segment **pp, int free_dynamic) +{ + struct dynamic_allocation_blocks *ret; + struct stack_segment *pss; + pss = *pp; + while (pss != ((void *)0)) + ret = merge_dynamic_blocks (pss->dynamic_allocation, ret); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c new file mode 100644 index 0000000..a7287fb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-flatten" } */ + +struct stack_segment +{ + struct stack_segment *next; + struct dynamic_allocation_blocks *dynamic_allocation; +}; +struct dynamic_allocation_blocks +{ + struct dynamic_allocation_blocks *next; +}; +static struct dynamic_allocation_blocks * +merge_dynamic_blocks (struct dynamic_allocation_blocks *a, + struct dynamic_allocation_blocks *b) +{ + struct dynamic_allocation_blocks **pp; + if (b == ((void *)0)) + for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next) + ; + return a; +} +__morestack_release_segments (struct stack_segment **pp, int free_dynamic) +{ + struct dynamic_allocation_blocks *ret; + struct stack_segment *pss; + while (pss != ((void *)0)) + { + struct stack_segment *next; + next = pss->next; + { + if (free_dynamic) + { + ret = merge_dynamic_blocks (pss->dynamic_allocation, ret); + } + } + pss = next; + } +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c new file mode 100644 index 0000000..d3d66ab --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-flatten" } */ + + +int +split_directories (const char *name, int *ptr_num_dirs) +{ + int num_dirs = 0; + char **dirs; + const char *p, *q; + int ch; + while ((ch = *p++) != '\0') + { + num_dirs++; + while (((*p) == '/')) + p++; + } + return (dirs[num_dirs - 1] == ((void *)0)); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c new file mode 100644 index 0000000..8e551ac --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-flatten" } */ + +void +formatted_backspace (int common, char *s) +{ + int base; + int n; + do + { + if (sseek (s, base, 0) < 0) + goto io_error; + + while (n > 0) + { + n--; + base += n + 1; + } + } + while (base != 0); + io_error: + generate_error (common, 0, ((void *)0)); +} diff --git a/gcc/timevar.def b/gcc/timevar.def index 86e2999..89ff8e8 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -152,6 +152,7 @@ DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS , "Graphite data dep analysis") DEFTIMEVAR (TV_GRAPHITE_CODE_GEN , "Graphite code generation") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution") +DEFTIMEVAR (TV_TREE_LOOP_FLATTENING , "tree loop flattening") DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences") DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c new file mode 100644 index 0000000..826e7e8 --- /dev/null +++ b/gcc/tree-loop-flattening.c @@ -0,0 +1,625 @@ +/* Loop flattening. + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Sebastian Pop . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "rtl.h" +#include "output.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "toplev.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-pass.h" +#include "gimple.h" +#include "params.h" +#include "dbgcnt.h" + +/* This loop flattening pass transforms backward pointing edges into + forward pointing edges. + + The back-edge removal transformation was described in the 1983 + paper by Allen J. R., Ken Kennedy, Carrie Porterfield, and Joe + Warren: "Conversion of control dependence to data dependence" + available from http://doi.acm.org/10.1145/567067.567085 + + The back-edge removal algorithm was presented in that paper as part + of the if-conversion algorithm for backward pointing edges. In + this section we will first provide a description of this technique + adapted for the Gimple-SSA form, followed by an example, and a + discussion of the differences with the higher level loop flattening + transformation. + + The back-edge removal algorithm transforms control dependences into + data dependences by using a boolean variable. The values taken by + the boolean variable control the execution path of the forward + edges created in order to use the back-edge of an outer loop. + + The first step of the algorithm detects a surrounding loop and all + the back-edges of the loop body: these back-edges can be inner + loops or strongly connected components of the CFG that cannot be + reduced to natural loops. + + Each back-edge is removed by redirecting the target of the + back-edge to the latch basic block of the surrounding loop. A + boolean variable is created in the latch. It is cleared when the + redirected back-edge is taken and it is set to true for any other + paths leading to the latch. + + The header basic block of the surrounding loop is split before its + statements and a new condition is added based on the control + variable: when the control variable is set to true, the execution + proceeds as normal to the basic block that contains the statements + of the header; when the control variable is cleared, meaning that + the back-edge has been taken, the execution proceeds to the point + where the redirected back-edge was pointing. + + The last step updates the SSA form after all the back-edges have + been redirected to the latch, and the new edges from the header to + the destination of back-edges have been created. + + Another description of loop flattening in a very Fortran specific + way is in the 1992 paper by Reinhard von Hanxleden and Ken Kennedy: + "Relaxing SIMD Control Flow Constraints using Loop Transformations" + available from + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */ + +/* Keep the loop structure for LOOP and remove all the loop structures + under LOOP. */ + +static void +cancel_subloops (loop_p loop) +{ + int i; + loop_p li; + VEC (loop_p, heap) *lv = VEC_alloc (loop_p, heap, 3); + + for (li = loop->inner; li; li = li->next) + VEC_safe_push (loop_p, heap, lv, li); + + FOR_EACH_VEC_ELT (loop_p, lv, i, li) + cancel_loop_tree (li); + + VEC_free (loop_p, heap, lv); +} + +/* Before creating other phi nodes in LOOP->header for the control + flags, update the phi nodes of LOOP->header and add the necessary + phi nodes in the LOOP->latch that now contains several paths on + which the values are not updated. PRED_E is the single edge that + was pointing to the LOOP->latch basic block before inner back-edges + were redirected to the LOOP->latch. */ + +static void +update_loop_phi_nodes (loop_p loop, edge pred_e) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) + { + edge e; + edge_iterator ei; + gimple phi = gsi_stmt (gsi); + tree back_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + tree res = gimple_phi_result (phi); + tree var = SSA_NAME_VAR (res); + + phi = create_phi_node (var, loop->latch); + create_new_def_for (gimple_phi_result (phi), phi, + gimple_phi_result_ptr (phi)); + + FOR_EACH_EDGE (e, ei, loop->latch->preds) + add_phi_arg (phi, (e == pred_e ? back_arg : res), + e, UNKNOWN_LOCATION); + + res = gimple_phi_result (phi); + add_phi_arg (gsi_stmt (gsi), res, loop_latch_edge (loop), + UNKNOWN_LOCATION); + } +} + +/* Creates a control flag for the FORWARDED_EDGE that represents the + back-edge that has been forwarded to the latch basic block of LOOP. + INNER_BODY is the basic block to which the back-edge was pointing + before redirection. This function creates a boolean control flag + that is cleared when the FORWARDED_EDGE is taken and set for all + the other paths. This function adds the corresponding phi nodes in + LOOP->latch and LOOP->header, and finally adds an edge from + LOOP->header to the INNER_BODY guarded by the control flag. */ + +static void +create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body) +{ + edge e, preheader; + edge outer_latch_e = loop_latch_edge (loop); + const char *name = "_flat_"; + tree var = create_tmp_var (boolean_type_node, name); + tree res; + gimple phi, cond_stmt; + gimple_stmt_iterator gsi; + edge_iterator ei; + + /* Adds a control variable for the redirected FORWARDED_EDGE. */ + add_referenced_var (var); + phi = create_phi_node (var, forwarded_edge->dest); + create_new_def_for (gimple_phi_result (phi), phi, + gimple_phi_result_ptr (phi)); + + FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds) + add_phi_arg (phi, (e == forwarded_edge + ? boolean_false_node + : boolean_true_node), + e, UNKNOWN_LOCATION); + res = gimple_phi_result (phi); + + /* Add a phi node in LOOP->header for the control variable. */ + phi = create_phi_node (var, loop->header); + create_new_def_for (gimple_phi_result (phi), phi, + gimple_phi_result_ptr (phi)); + + preheader = loop_preheader_edge (loop); + FOR_EACH_EDGE (e, ei, loop->header->preds) + add_phi_arg (phi, (e == preheader + ? boolean_true_node + : res), + e, UNKNOWN_LOCATION); + res = gimple_phi_result (phi); + + /* Split LOOP->header to insert the control variable condition. */ + e = split_block_after_labels (loop->header); + e->flags = EDGE_TRUE_VALUE; + e = make_edge (loop->header, inner_body, EDGE_FALSE_VALUE); + cond_stmt = gimple_build_cond (EQ_EXPR, res, boolean_true_node, + NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (loop->header); + gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); +} + +/* Adds phi nodes to the LOOP->header and LOOP->latch for the ssa_name + NAME. ARG is the argument of the latch phi node set for the + FORWARDED_EDGE, and all the other edges merged by the latch phi + node are set to the result of the LOOP->header phi node. The latch + edge of the LOOP->header phi node is set to the result of the + LOOP->latch phi node, and the other argument is set to an arbitrary + valid value defined before the loop (note that this initial value + is never used in the loop). Returns the LOOP->header phi result. */ + +static tree +add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge, + tree arg) +{ + edge e; + edge_iterator ei; + tree res, zero, var = SSA_NAME_VAR (name); + gimple loop_phi = create_phi_node (var, loop->header); + gimple latch_phi = create_phi_node (var, loop->latch); + + create_new_def_for (gimple_phi_result (loop_phi), loop_phi, + gimple_phi_result_ptr (loop_phi)); + create_new_def_for (gimple_phi_result (latch_phi), latch_phi, + gimple_phi_result_ptr (latch_phi)); + + /* The value set to ZERO will never be used in the loop, however we + have to construct something meaningful for virtual SSA_NAMEs. */ + if (TREE_CODE (arg) != SSA_NAME) + zero = arg; + else if (is_gimple_reg (arg)) + zero = fold_convert (TREE_TYPE (arg), integer_zero_node); + else + zero = gimple_default_def (cfun, SSA_NAME_VAR (arg)); + + res = gimple_phi_result (latch_phi); + FOR_EACH_EDGE (e, ei, loop->header->preds) + add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero), + e, UNKNOWN_LOCATION); + + res = gimple_phi_result (loop_phi); + FOR_EACH_EDGE (e, ei, loop->latch->preds) + add_phi_arg (latch_phi, (e == forwarded_edge ? arg : res), + e, UNKNOWN_LOCATION); + + return res; +} + +/* Creates phi nodes for each inductive definition, i.e., loop phi + nodes. For each induction phi node in the old loop header, i.e., + in the single_succ (INNER_BODY), insert a phi node in the + LOOP->latch that takes the updated value of the induction on the + FORWARDED_EDGE, and maintains the same value as in the phi node of + the LOOP->header for all the other possible paths reaching + LOOP->latch. This function has to be called after all the + back-edges have been redirected. */ + +static void +update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop, + basic_block inner_body) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (single_succ (inner_body)); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple old_loop_phi = gsi_stmt (gsi); + tree back_arg = PHI_ARG_DEF_FROM_EDGE (old_loop_phi, + single_succ_edge (inner_body)); + tree res = gimple_phi_result (old_loop_phi); + + res = add_header_and_latch_phis (loop, res, forwarded_edge, back_arg); + add_phi_arg (old_loop_phi, res, single_succ_edge (inner_body), + UNKNOWN_LOCATION); + } +} + +/* Renames all the uses of OLD_NAME with NEW_NAME (except the phi + nodes of DEF_BB) in all the basic blocks dominated by DEF_BB and in + the arguments of all the phi nodes originating in a basic block + that is dominated by DEF_BB. */ + +static void +rename_dominated_uses (loop_p loop, tree old_name, tree new_name, + basic_block def_bb) +{ + imm_use_iterator uit; + gimple stmt; + use_operand_p use_p; + ssa_op_iter op_iter; + + FOR_EACH_IMM_USE_STMT (stmt, uit, old_name) + { + enum gimple_code code = gimple_code (stmt); + basic_block use_bb = gimple_bb (stmt); + edge_iterator ei; + edge e; + + if (code == GIMPLE_PHI) + { + FOR_EACH_EDGE (e, ei, use_bb->preds) + if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name + && dominated_by_p (CDI_DOMINATORS, e->src, def_bb) + && use_bb != def_bb) + replace_exp (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx), + new_name); + } + else + { + if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb)) + continue; + + if (use_bb->loop_father == loop) + { + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES) + if (USE_FROM_PTR (use_p) == old_name) + replace_exp (use_p, new_name); + } + else + /* Virtual operands are not translated into loop closed + SSA form, and thus they may occur in the rest of + the program without a loop close vphi node. */ + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES) + if (USE_FROM_PTR (use_p) == old_name) + replace_exp (use_p, new_name); + } + } +} + +/* Helper function for add_missing_phi_nodes_1. Adds to LOOP all the + missing phi nodes for NAME and updates the arguments of the + LATCH_PHI node. LOOP_PHI node is the inductive definition of NAME + in LOOP->header. */ + +static void +add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name, + VEC (gimple, heap) *phis) +{ + unsigned i; + basic_block bb, dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name)); + VEC (basic_block, heap) *dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS, + dom_bb); + + FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb) + { + edge e; + edge_iterator ei; + + if (bb == loop->latch + || bb->loop_father != loop) + continue; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + gimple phi = VEC_index (gimple, phis, e->dest->index); + + if (phi) + add_phi_arg (phi, name, e, UNKNOWN_LOCATION); + + else if (!single_pred_p (e->dest) + && !dominated_by_p (CDI_DOMINATORS, e->dest, dom_bb) + && e->dest->loop_father == loop) + { + tree var = SSA_NAME_VAR (name); + + phi = create_phi_node (var, e->dest); + create_new_def_for (gimple_phi_result (phi), phi, + gimple_phi_result_ptr (phi)); + VEC_replace (gimple, phis, e->dest->index, phi); + add_phi_arg (phi, name, e, UNKNOWN_LOCATION); + rename_dominated_uses (loop, old_name, gimple_phi_result (phi), + e->dest); + add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name, + phis); + } + } + } +} + +/* Helper function for add_missing_phi_nodes. For all the definitions + of DEF_STMT add the missing phi nodes in LOOP. */ + +static void +add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt) +{ + def_operand_p def_p; + ssa_op_iter op_iter; + basic_block bb = gimple_bb (def_stmt); + + FOR_EACH_PHI_OR_STMT_DEF (def_p, def_stmt, op_iter, SSA_OP_DEF|SSA_OP_VDEF) + { + edge e; + edge_iterator ei; + tree res, zero, var; + gimple loop_phi, latch_phi, use_stmt; + imm_use_iterator uit; + tree name = DEF_FROM_PTR (def_p); + bool needs_update = false; + VEC (gimple, heap) *phis; + int i; + + FOR_EACH_IMM_USE_STMT (use_stmt, uit, name) + { + basic_block use_bb = gimple_bb (use_stmt); + + if (!dominated_by_p (CDI_DOMINATORS, bb, use_bb)) + { + needs_update = true; + BREAK_FROM_IMM_USE_STMT (uit); + } + } + + if (!needs_update) + continue; + + var = SSA_NAME_VAR (name); + loop_phi = create_phi_node (var, loop->header); + latch_phi = create_phi_node (var, loop->latch); + + create_new_def_for (gimple_phi_result (loop_phi), loop_phi, + gimple_phi_result_ptr (loop_phi)); + create_new_def_for (gimple_phi_result (latch_phi), latch_phi, + gimple_phi_result_ptr (latch_phi)); + + /* The value set to ZERO will never be used in the loop, however we + have to construct something meaningful for virtual SSA_NAMEs. */ + if (is_gimple_reg (name)) + zero = fold_convert (TREE_TYPE (name), integer_zero_node); + else + zero = gimple_default_def (cfun, SSA_NAME_VAR (name)); + + res = gimple_phi_result (latch_phi); + FOR_EACH_EDGE (e, ei, loop->header->preds) + add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero), + e, UNKNOWN_LOCATION); + + res = gimple_phi_result (loop_phi); + FOR_EACH_EDGE (e, ei, loop->latch->preds) + add_phi_arg (latch_phi, res, e, UNKNOWN_LOCATION); + + phis = VEC_alloc (gimple, heap, n_basic_blocks); + for (i = 0; i < n_basic_blocks; i++) + VEC_quick_push (gimple, phis, NULL); + + VEC_replace (gimple, phis, loop->latch->index, latch_phi); + VEC_replace (gimple, phis, loop->header->index, loop_phi); + add_missing_phi_nodes_2 (loop, name, name, phis); + + for (i = 0; i < n_basic_blocks; i++) + { + gimple phi = VEC_index (gimple, phis, i); + + if (!phi) + continue; + + FOR_EACH_EDGE (e, ei, BASIC_BLOCK (i)->preds) + if (!PHI_ARG_DEF_FROM_EDGE (phi, e)) + add_phi_arg (phi, res, e, UNKNOWN_LOCATION); + } + + VEC_free (gimple, heap, phis); + } +} + +/* Walks over the code of LOOP and adds the missing phi nodes at + control flow junctions. When a variable is defined in an outer + loop and used in an inner loop, the definition dominates the use. + After the loop flattening, the inner loop body is directly + reachable from the LOOP->header by using the added edge guarded by + the boolean flag that controls the execution of the back-edge that + was eliminated. In this case, the use is not dominated by the + definition, and this function adds the missing phi nodes. */ + +static void +add_missing_phi_nodes (loop_p loop) +{ + gimple_stmt_iterator gsi; + int i, n = loop->num_nodes; + basic_block *bbs = get_loop_body (loop); + + for (i = 0; i < n; i++) + { + basic_block bb = bbs[i]; + + /* LOOP->header dominates all the blocks of the loop body, and + so we don't have to look at the missing phi nodes for the + definitions of LOOP->header. */ + if (bb == loop->header) + continue; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!gimple_nop_p (gsi_stmt (gsi))) + add_missing_phi_nodes_1 (loop, gsi_stmt (gsi)); + + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + add_missing_phi_nodes_1 (loop, gsi_stmt (gsi)); + } + + free (bbs); +} + +/* Removes all the back-edges of LOOP except its own back-edge. */ + +static unsigned +flatten_loop (loop_p loop) +{ + int i, n = loop->num_nodes; + basic_block *bbs; + VEC (edge, heap) *back_edges; + VEC (basic_block, heap) *loop_body; + edge_iterator ei; + edge e, pred_e; + unsigned max_nb_basic_blocks = PARAM_VALUE (PARAM_LFLAT_MAX_NB_BBS);; + + if (loop->num_nodes > max_nb_basic_blocks + || !single_exit (loop) + || !dbg_cnt (lflat)) + return 0; + + mark_dfs_back_edges (); + bbs = get_loop_body (loop); + + back_edges = VEC_alloc (edge, heap, 3); + loop_body = VEC_alloc (basic_block, heap, 3); + + for (i = 0; i < n; i++) + FOR_EACH_EDGE (e, ei, bbs[i]->succs) + if (e->flags & EDGE_DFS_BACK + && e->src != loop->latch) + VEC_safe_push (edge, heap, back_edges, e); + + free (bbs); + + /* Early return and do not modify the code when there are no back + edges. */ + if (VEC_empty (edge, back_edges)) + return 0; + + cancel_subloops (loop); + + /* Split the latch edge to make sure that the latch basic block does + not contain code. */ + loop->latch = split_edge (loop_latch_edge (loop)); + pred_e = single_pred_edge (loop->latch); + + FOR_EACH_VEC_ELT (edge, back_edges, i, e) + { + basic_block dest = split_edge (e); + + /* Redirect BACK_EDGE to LOOP->latch. */ + redirect_edge_and_branch_force (e, loop->latch); + + /* Save the basic block where it was pointing. */ + VEC_safe_push (basic_block, heap, loop_body, dest); + } + + update_loop_phi_nodes (loop, pred_e); + + FOR_EACH_VEC_ELT (edge, back_edges, i, e) + create_control_flag (e, loop, VEC_index (basic_block, loop_body, i)); + + FOR_EACH_VEC_ELT (edge, back_edges, i, e) + update_inner_induction_phi_nodes (e, loop, VEC_index (basic_block, + loop_body, i)); + + free_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + add_missing_phi_nodes (loop); + + /* If we redirected some back-edges, split the latch edge to create + an empty LOOP->latch. */ + if (!single_pred_p (loop->latch)) + loop->latch = split_edge (loop_latch_edge (loop)); + + return TODO_update_ssa | TODO_verify_ssa; +} + +/* Flattens all the loops of the current function. */ + +static unsigned int +tree_loop_flattening (void) +{ + unsigned todo = 0; + loop_p loop; + loop_iterator li; + + if (number_of_loops () <= 1) + return 0; + + FOR_EACH_LOOP (li, loop, 0) + todo |= flatten_loop (loop); + +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); + verify_flow_info (); +#endif + + cleanup_tree_cfg (); + return todo; +} + +static bool +gate_tree_loop_flattening (void) +{ + return flag_tree_loop_flattening != 0; +} + +struct gimple_opt_pass pass_flatten_loops = +{ + { + GIMPLE_PASS, + "lflat", /* name */ + gate_tree_loop_flattening, /* gate */ + tree_loop_flattening, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_LOOP_FLATTENING, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func + | TODO_update_ssa + | TODO_ggc_collect /* todo_flags_finish */ + } +}; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index a87a770..e2f257f 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -374,6 +374,7 @@ extern struct gimple_opt_pass pass_graphite; extern struct gimple_opt_pass pass_graphite_transforms; extern struct gimple_opt_pass pass_if_conversion; extern struct gimple_opt_pass pass_loop_distribution; +extern struct gimple_opt_pass pass_flatten_loops; extern struct gimple_opt_pass pass_vectorize; extern struct gimple_opt_pass pass_slp_vectorize; extern struct gimple_opt_pass pass_complete_unroll;