Enable loop peeling at -O3

Message ID	20160530152633.GA96777@kam.mff.cuni.cz
State	New
Headers	show Return-Path: <gcc-patches-return-428608-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:cc:subject:message-id:references:mime-version :content-type:in-reply-to; q=dns; s=default; b=O32xSjEmskOoXYurD 8IW9M7JfefEcnrgkF5udIhL+utAiRxvod4xYYawslhH2jTjYfKvVeikX0TXz++L9 hfIVh1t04f+rLkhMI3Da1qgG9Xg9+BIQNNJQxAnXN6YaKA7IELUecxKv45sHSQQo s9OD91xW7oBc58wIvQMvQgbuTE= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Date: Mon, 30 May 2016 17:26:33 +0200 From: Jan Hubicka <hubicka@ucw.cz> To: Jan Hubicka <hubicka@ucw.cz> Cc: Richard Biener <rguenther@suse.de>, Sandra Loosemore <sandra@codesourcery.com>, gcc-patches@gcc.gnu.org Subject: Re: Enable loop peeling at -O3 Message-ID: <20160530152633.GA96777@kam.mff.cuni.cz> References: <20160527131928.GE44464@kam.mff.cuni.cz> <57486D96.8090508@codesourcery.com> <20160528150444.GB5812@kam.mff.cuni.cz> <alpine.LSU.2.11.1605301123000.1493@t29.fhfr.qr> <20160530110740.GC2770@kam.mff.cuni.cz> <alpine.LSU.2.11.1605301327270.1493@t29.fhfr.qr> <20160530113921.GD2770@kam.mff.cuni.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20160530113921.GD2770@kam.mff.cuni.cz> User-Agent: Mutt/1.5.21 (2010-09-15)

Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 236873) +++ doc/invoke.texi (working copy) @@ -6338,7 +6338,8 @@ by @option{-O2} and also turns on the @o @option{-fgcse-after-reload}, @option{-ftree-loop-vectorize}, @option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths} @option{-ftree-slp-vectorize}, @option{-fvect-cost-model}, -@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options. +@option{-ftree-partial-pre}, @option{-fpeel-loops} +and @option{-fipa-cp-clone} options. @item -O0 @opindex O0 @@ -8661,10 +8662,11 @@ the loop is entered. This usually makes @item -fpeel-loops @opindex fpeel-loops Peels loops for which there is enough information that they do not -roll much (from profile feedback). It also turns on complete loop peeling -(i.e.@: complete removal of loops with small constant number of iterations). +roll much (from profile feedback or static analysis). It also turns on +complete loop peeling (i.e.@: complete removal of loops with small constant +number of iterations). -Enabled with @option{-fprofile-use}. +Enabled with @option{-O3} and/or @option{-fprofile-use}. @item -fmove-loop-invariants @opindex fmove-loop-invariants Index: opts.c =================================================================== --- opts.c (revision 236873) +++ opts.c (working copy) @@ -535,6 +535,7 @@ static const struct default_options defa { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_fpeel_loops, NULL, 1 }, /* -Ofast adds optimizations to -O3. */ { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, Index: testsuite/gcc.dg/tree-ssa/peel1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/peel1.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/peel1.c (working copy) @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-cunroll-details" } */ +struct foo {int b; int a[3];} foo; +void add(struct foo *a,int l) +{ + int i; + for (i=0;i<l;i++) + a->a[i]++; +} +/* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 3 times." "cunroll"} } */ +/* { dg-final { scan-tree-dump "Peeled loop 1, 4 times." "cunroll"} } */ Index: testsuite/gcc.dg/tree-ssa/peel2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/peel2.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/peel2.c (working copy) @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fpeel-all-loops -fdump-tree-cunroll-details --param max-peel-times=16 --param max-peeled-insns=100" } */ +void add(int *a,int l) +{ + int i; + for (i=0;i<l;i++) + a[i]++; +} +/* { dg-final { scan-tree-dump "Peeled loop 1, 16 times." "cunroll"} } */ Index: testsuite/gcc.dg/tree-ssa/pr61743-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/pr61743-1.c (revision 236873) +++ testsuite/gcc.dg/tree-ssa/pr61743-1.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details -fno-peel-loops" } */ #define N 8 #define M 14 Index: testsuite/gcc.dg/tree-ssa/pr61743-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/pr61743-2.c (revision 236873) +++ testsuite/gcc.dg/tree-ssa/pr61743-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details -fno-peel-loops" } */ #define N 8 #define M 14 Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 236878) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -594,6 +594,8 @@ remove_redundant_iv_tests (struct loop * /* Stores loops that will be unlooped after we process whole loop tree. */ static vec<loop_p> loops_to_unloop; static vec<int> loops_to_unloop_nunroll; +/* Stores loops that has been peeled. */ +static bitmap peeled_loops; /* Cancel all fully unrolled loops by putting __builtin_unreachable on the latch edge. @@ -962,14 +964,16 @@ try_peel_loop (struct loop *loop, vec<edge> to_remove = vNULL; edge e; - /* If the iteration bound is known and large, then we can safely eliminate - the check in peeled copies. */ - if (TREE_CODE (niter) != INTEGER_CST) - exit = NULL; - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) return false; + if (bitmap_bit_p (peeled_loops, loop->num)) + { + if (dump_file) + fprintf (dump_file, "Not peeling: loop is already peeled\n"); + return false; + } + /* Peel only innermost loops. While the code is perfectly capable of peeling non-innermost loops, the heuristics would probably need some improvements. */ @@ -990,6 +994,8 @@ try_peel_loop (struct loop *loop, /* Check if there is an estimate on the number of iterations. */ npeel = estimated_loop_iterations_int (loop); if (npeel < 0) + npeel = likely_max_loop_iterations_int (loop); + if (npeel < 0) { if (dump_file) fprintf (dump_file, "Not peeling: number of iterations is not " @@ -1036,8 +1042,7 @@ try_peel_loop (struct loop *loop, && wi::leu_p (npeel, wi::to_widest (niter))) { bitmap_ones (wont_exit); - if (wi::eq_p (wi::to_widest (niter), npeel)) - bitmap_clear_bit (wont_exit, 0); + bitmap_clear_bit (wont_exit, 0); } else { @@ -1074,14 +1079,14 @@ try_peel_loop (struct loop *loop, } if (loop->any_upper_bound) { - if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) + if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) loop->nb_iterations_upper_bound -= npeel; else loop->nb_iterations_upper_bound = 0; } if (loop->any_likely_upper_bound) { - if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) + if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) loop->nb_iterations_likely_upper_bound -= npeel; else { @@ -1107,6 +1112,7 @@ try_peel_loop (struct loop *loop, else if (loop->header->frequency) scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency); scale_loop_profile (loop, scale, 0); + bitmap_set_bit (peeled_loops, loop->num); return true; } /* Adds a canonical induction variable to LOOP if suitable. @@ -1519,9 +1526,20 @@ pass_complete_unroll::execute (function if (number_of_loops (fun) <= 1) return 0; - return tree_unroll_loops_completely (flag_unroll_loops - || flag_peel_loops - || optimize >= 3, true); + /* If we ever decide to run loop peeling more than once, we will need to + track loops already peeled in loop structures themselves to avoid + re-peeling the same loop multiple times. */ + if (flag_peel_loops) + peeled_loops = BITMAP_ALLOC (NULL); + int val = tree_unroll_loops_completely (flag_unroll_loops + || flag_peel_loops + || optimize >= 3, true); + if (peeled_loops) + { + BITMAP_FREE (peeled_loops); + peeled_loops = NULL; + } + return val; } } // anon namespace

Enable loop peeling at -O3

Commit Message

Comments

Patch