From patchwork Thu Oct 11 16:21:09 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Hubicka X-Patchwork-Id: 190954 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 57A482C078C for ; Fri, 12 Oct 2012 03:21:30 +1100 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1350577291; h=Comment: DomainKey-Signature:Received:Received:Received:Received:Date: From:To:Subject:Message-ID:MIME-Version:Content-Type: Content-Disposition:User-Agent:Mailing-List:Precedence:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:Sender: Delivered-To; bh=TgGUNyfpbaChDtAikSbcIbVev+E=; b=G8lvpdhbLPA/HBB 5GL0t+SctgeyNiDy8mOlATDivirKXqul/EuNob2UPh0pGeIJ3uSu/k18LSDf6Shr uyS0nUypPsi9JNeLEEkWVA9PJzT5Av8CFsG0UMM8im60ayC1lejYHlYYh3yrdS2O thNza7L6MhwHu6DwmxZy8EsV2EnQ= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Date:From:To:Subject:Message-ID:MIME-Version:Content-Type:Content-Disposition:User-Agent:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=pJZRl0o5fQ387al7Mwncw346E+unCqiAp3F569p9794t+fPkhu7DjxHQGBo6Q2 ovN2zXHT2fJj6x1qbTHduu0HLQQAzlDWYUY0CajsF2Jr+xSg94iBtTqA9TW8aq6d x8FNEN1KpbW0X8i6bzQEi9EuaR3/MiRS3+Xu1mB+pIKPg=; Received: (qmail 3952 invoked by alias); 11 Oct 2012 16:21:21 -0000 Received: (qmail 3942 invoked by uid 22791); 11 Oct 2012 16:21:18 -0000 X-SWARE-Spam-Status: No, hits=-4.8 required=5.0 tests=AWL, BAYES_00, KHOP_RCVD_UNTRUST, RCVD_IN_DNSWL_LOW, RCVD_IN_HOSTKARMA_W, RCVD_IN_HOSTKARMA_WL, RP_MATCHES_RCVD, TW_CF, TW_TM, T_FRT_BELOW2 X-Spam-Check-By: sourceware.org Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz) (195.113.20.16) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 11 Oct 2012 16:21:10 +0000 Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id 54F5354371D; Thu, 11 Oct 2012 18:21:09 +0200 (CEST) Date: Thu, 11 Oct 2012 18:21:09 +0200 From: Jan Hubicka To: gcc-patches@gcc.gnu.org, rguenther@suse.de Subject: Make try_unroll_loop_completely to use loop bounds recorded Message-ID: <20121011162109.GC32179@kam.mff.cuni.cz> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.20 (2009-06-14) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, while looking into RTL loop peeling micopmilation I found that we now do a lot of RTL loop peeling for loops of the form int a[2]; test(int c) { int i; for (i=0;isrc && stmt == last_stmt (exit->src)) + if (exit && body[i] == exit->src && stmt == last_stmt (exit->src)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Exit condition will be eliminated.\n"); @@ -326,13 +326,43 @@ try_unroll_loop_completely (struct loop unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; gimple cond; struct loop_size size; + bool n_unroll_found = false; + HOST_WIDE_INT maxiter; + basic_block latch; + edge latch_edge; + location_t locus; + int flags; + bool irred_invalidated = false; + gimple stmt; + gimple_stmt_iterator gsi; + edge other_edge = NULL, last_exit; + int num = loop->num; + bool last_iteration_updated = false; + + /* See if we proved number of iterations to be low constant. */ + if (host_integerp (niter, 1)) + { + n_unroll = tree_low_cst (niter, 1); + n_unroll_found = true; + } - if (loop->inner) + /* See if we can improve our estimate by using recorded loop bounds. */ + maxiter = max_loop_iterations_int (loop); + if (maxiter >= 0 + && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) + { + n_unroll = maxiter; + n_unroll_found = true; + } + + if (!n_unroll_found) return false; - if (!host_integerp (niter, 1)) + /* We unroll only inner loops, because we do not consider it profitable + otheriwse. We still can cancel loopback edge of not rolling loop; + this is always a good idea. */ + if (loop->inner && n_unroll) return false; - n_unroll = tree_low_cst (niter, 1); max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); if (n_unroll > max_unroll) @@ -340,6 +370,10 @@ try_unroll_loop_completely (struct loop if (n_unroll) { + sbitmap wont_exit; + edge e; + unsigned i; + VEC (edge, heap) *to_remove = NULL; if (ul == UL_SINGLE_ITER) return false; @@ -372,14 +408,6 @@ try_unroll_loop_completely (struct loop fprintf (dump_file, "Not unrolling loop %d.\n", loop->num); return false; } - } - - if (n_unroll) - { - sbitmap wont_exit; - edge e; - unsigned i; - VEC (edge, heap) *to_remove = NULL; initialize_original_copy_tables (); wont_exit = sbitmap_alloc (n_unroll + 1); @@ -408,24 +436,143 @@ try_unroll_loop_completely (struct loop free_original_copy_tables (); } - cond = last_stmt (exit->src); - if (exit->flags & EDGE_TRUE_VALUE) - gimple_cond_make_true (cond); - else - gimple_cond_make_false (cond); - update_stmt (cond); + /* After complette unrolling we still may get rid of the conditional + on exit in the last copy even if we have no idea what it does. + This is quite common case for loops of form + + int a[5]; + for (i=0;isrc->loop_father != loop + || !(last_exit->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) + last_exit = NULL; + else + { + if (last_exit->src->flags & BB_IRREDUCIBLE_LOOP) + last_exit = NULL; + } + } + + /* If loop has only one exit edge, we can remove the conditional from + the last copy of the loop. + TODO: We should account this update into cost model. */ + if (last_exit) + { + cond = last_stmt (last_exit->src); + if (last_exit->flags & EDGE_TRUE_VALUE) + gimple_cond_make_true (cond); + else + gimple_cond_make_false (cond); + update_stmt (cond); + other_edge = EDGE_SUCC (last_exit->src, 0); + if (other_edge == last_exit) + other_edge = EDGE_SUCC (last_exit->src, 1); + last_iteration_updated = true; + } + + /* Now destroy the loop. First try to do so by cancelling the + patch from exit conditional if we identified good candidate. + + TODO: We should update the loop profile for the fact that + the last iteration no longer executes. */ + if (!other_edge || !remove_path (other_edge)) + { + /* We did not manage to cancel the loop by removing the patch. + The loop latch remains reachable even if it will never be reached + at runtime. We must redirect it to somewhere, so create basic + block containg __builtin_unreachable call for this reason. */ + latch = loop->latch; + latch_edge = loop_latch_edge (loop); + flags = latch_edge->flags; + locus = latch_edge->goto_locus; + + /* Unloop destroys the latch edge. */ + unloop (loop, &irred_invalidated); + if (irred_invalidated) + mark_irreducible_loops (); + + /* Create new basic block for the latch edge destination and wire + it in. */ + stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); + latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); + gsi = gsi_start_bb (latch_edge->dest); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + latch_edge->dest->loop_father = current_loops->tree_root; + set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); + latch_edge->probability = 0; + latch_edge->count = 0; + latch_edge->flags |= flags; + latch_edge->goto_locus = locus; + } if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num); + { + if (!n_unroll) + fprintf (dump_file, "Turned loop %d to non-loop; it never loops.%s\n", num, + last_iteration_updated + ? " Last iteration exit edge was proved true." : ""); + else + { + fprintf (dump_file, "Unrolled loop %d completely " + "(duplicated %i times).%s%s\n", num, (int)n_unroll, + exit + ? " Exit condition of peeled iterations was eliminated." : "", + last_iteration_updated + ? " Last iteration exit edge was proved true." : ""); + } + } - return true; + return true; } /* Adds a canonical induction variable to LOOP if suitable. CREATE_IV is true if we may create a new iv. UL determines which loops we are allowed to completely unroll. If TRY_EVAL is true, we try to determine the number of iterations of a loop by direct evaluation. - Returns true if cfg is changed. */ + Returns true if cfg is changed. + + IRREDUCIBLE_LOOPS_FOUND is used to bookkeep if we discovered irreducible + loops. This is used in a special case of the exit condition analysis. */ static bool canonicalize_loop_induction_variables (struct loop *loop, @@ -455,22 +602,34 @@ canonicalize_loop_induction_variables (s || TREE_CODE (niter) != INTEGER_CST)) niter = find_loop_niter_by_eval (loop, &exit); - if (chrec_contains_undetermined (niter) - || TREE_CODE (niter) != INTEGER_CST) - return false; + if (TREE_CODE (niter) != INTEGER_CST) + exit = NULL; } - if (dump_file && (dump_flags & TDF_DETAILS)) + /* We work exceptionally hard here to estimate the bound + by find_loop_niter_by_eval. Be sure to keep it for future. */ + if (niter && TREE_CODE (niter) == INTEGER_CST) + record_niter_bound (loop, tree_to_double_int (niter), false, true); + + if (dump_file && (dump_flags & TDF_DETAILS) + && TREE_CODE (niter) == INTEGER_CST) { fprintf (dump_file, "Loop %d iterates ", loop->num); print_generic_expr (dump_file, niter, TDF_SLIM); fprintf (dump_file, " times.\n"); } + if (dump_file && (dump_flags & TDF_DETAILS) + && max_loop_iterations_int (loop) >= 0) + { + fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, + (int)max_loop_iterations_int (loop)); + } if (try_unroll_loop_completely (loop, exit, niter, ul)) return true; - if (create_iv) + if (create_iv + && niter && !chrec_contains_undetermined (niter)) create_canonical_iv (loop, exit, niter); return false; @@ -483,11 +642,14 @@ unsigned int canonicalize_induction_variables (void) { loop_iterator li; - struct loop *loop; + struct loop *loop, *next; bool changed = false; - FOR_EACH_LOOP (li, loop, 0) + for (fel_init (&li, &next, LI_ONLY_INNERMOST); next;) { + loop = next; + fel_next (&li, &next); + changed |= canonicalize_loop_induction_variables (loop, true, UL_SINGLE_ITER, true); @@ -591,14 +753,18 @@ tree_unroll_loops_completely (bool may_i bool changed; enum unroll_level ul; int iteration = 0; + struct loop *next; do { changed = false; - FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) + for (fel_init (&li, &next, 0); next;) { - struct loop *loop_father = loop_outer (loop); + struct loop *loop_father = loop_outer (next); + + loop = next; + fel_next (&li, &next); if (may_increase_size && optimize_loop_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do Index: fortran/f95-lang.c =================================================================== --- fortran/f95-lang.c (revision 192360) +++ fortran/f95-lang.c (working copy) @@ -908,6 +908,11 @@ gfc_init_builtin_functions (void) gfc_define_builtin ("__builtin_expect", ftype, BUILT_IN_EXPECT, "__builtin_expect", ATTR_CONST_NOTHROW_LEAF_LIST); + ftype = build_function_type_list (void_type_node, NULL_TREE); + + gfc_define_builtin ("__builtin_unreachable", ftype, BUILT_IN_EXPECT, + "__builtin_unreachable", ATTR_NORETURN_NOTHROW_LEAF_LIST); + ftype = build_function_type_list (void_type_node, pvoid_type_node, NULL_TREE); gfc_define_builtin ("__builtin_free", ftype, BUILT_IN_FREE, Index: cfgloop.h =================================================================== --- cfgloop.h (revision 192360) +++ cfgloop.h (working copy) @@ -319,7 +319,8 @@ extern struct loop *loopify (edge, edge, struct loop * loop_version (struct loop *, void *, basic_block *, unsigned, unsigned, unsigned, bool); extern bool remove_path (edge); -void scale_loop_frequencies (struct loop *, int, int); +extern void scale_loop_frequencies (struct loop *, int, int); +extern void unloop (struct loop *, bool *); /* Induction variable analysis. */