PR rtl-optimization/64081: Enable RTL loop unrolling for duplicated exit blocks and back edges.

Message ID	20160205134321.GA14773@msticlxl57.ims.intel.com
State	New
Headers	show Return-Path: <gcc-patches-return-420870-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :date:to:cc:subject:message-id:mime-version:content-type; q=dns; s=default; b=mVwFKsEArBVteGDNTGCrvg2NFwGnjmJWEP4x43yoo6IUZ2LvNK crcl8yEvCTe0KP5jsjgQkkC0S+lEKZKUtyOyr7ZhKTqZEI5qjRY6ZgX4I+20roR0 SqJSQpR33KTJ0cF/ZckeavsAuDGCCaA0/YNHpi/ynRK8G7wGGxHFskxxQ= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Alexander Fomin <afomin.mailbox@gmail.com> Date: Fri, 5 Feb 2016 16:43:21 +0300 To: rguenther@suse.de Cc: gcc-patches@gcc.gnu.org, izamyatin@gmail.com, dje.gcc@gmail.com Subject: [PATCH] PR rtl-optimization/64081: Enable RTL loop unrolling for duplicated exit blocks and back edges. Message-ID: <20160205134321.GA14773@msticlxl57.ims.intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12)

diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index fecaf8f..d7d3e0d 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -289,9 +289,27 @@ iv_analysis_loop_init (struct loop *loop) check_iv_ref_table_size (); } +/* Checks that def is in an immediate predecessor of the latch block. */ + +static bool +def_pred_latch_p (df_ref d_ref) +{ + basic_block bb = DF_REF_BB (d_ref); + edge_iterator ei; + edge e; + + FOR_EACH_EDGE (e, ei, current_loop->latch->preds) + { + if (e->src == bb) + return true; + } + return false; +} + /* Finds the definition of REG that dominates loop latch and stores it to DEF. Returns false if there is not a single definition - dominating the latch. If REG has no definition in loop, DEF + dominating the latch or all defs are same and they are on different + predecessors of loop latch. If REG has no definition in loop, DEF is set to NULL and true is returned. */ static bool @@ -303,15 +321,28 @@ latch_dominating_def (rtx reg, df_ref *def) for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef)) { + /* Initialize this to true for the very first iteration when + SINGLE_RD is NULL. */ + bool def_pred_latch = true; + if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef)) || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef))) continue; - /* More than one reaching definition. */ - if (single_rd) + /* More than one reaching definition is ok in case definitions are + in predecessors of latch block and those definitions are the same. + Probably this could be relaxed and check for sub-dominance instead + predecessor. */ + if (single_rd + && (!(def_pred_latch = def_pred_latch_p (adef)) + || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)), + PATTERN (DF_REF_INSN (adef))))) return false; - if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef))) + /* It's ok if def is not executed on each iteration once we have defs on + latch's predecessors. */ + if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)) + && !def_pred_latch) return false; single_rd = adef; @@ -326,10 +357,10 @@ latch_dominating_def (rtx reg, df_ref *def) static enum iv_grd_result iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) { - df_ref use, adef; + df_ref use, adef = NULL; basic_block def_bb, use_bb; rtx_insn *def_insn; - bool dom_p; + bool dom_p, dom_latch_p = false; *def = NULL; if (!simple_reg_p (reg)) @@ -344,11 +375,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) if (!DF_REF_CHAIN (use)) return GRD_INVARIANT; - /* More than one reaching def. */ + adef = DF_REF_CHAIN (use)->ref; + /* Having more than one reaching def is ok once all defs are in blocks which + are latch's predecessors. */ if (DF_REF_CHAIN (use)->next) - return GRD_INVALID; + { + df_link* defs; + unsigned int def_num = 0; - adef = DF_REF_CHAIN (use)->ref; + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) + { + if (!def_pred_latch_p (defs->ref)) + return GRD_INVALID; + def_num++; + } + /* Make sure all preds contain definitions. */ + if (def_num != EDGE_COUNT (current_loop->latch->preds)) + return GRD_INVALID; + + dom_latch_p = true; + } /* We do not handle setting only part of the register. */ if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE) @@ -371,8 +417,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) /* The definition does not dominate the use. This is still OK if this may be a use of a biv, i.e. if the def_bb dominates loop - latch. */ - if (just_once_each_iteration_p (current_loop, def_bb)) + latch or all defs are in latch's predecessors. */ + if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb)) return GRD_MAYBE_BIV; return GRD_INVALID; @@ -2871,6 +2917,49 @@ fail: return; } +/* Check whether exit is not simple but still good for further analysis. + Looks like such loops mostly are a result of jump threading. */ + +static bool +check_complex_exit_p (struct loop* loop, basic_block bb) +{ + edge e; + basic_block pred, exit; + + if (EDGE_COUNT (bb->preds) > 1) + return false; + + e = EDGE_PRED (bb, 0); + + pred = e->src; + if (EDGE_COUNT (pred->succs) != 2) + return false; + + /* Predecessor must be tested (at least) once during any iteration. */ + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred)) + return false; + + if (EDGE_SUCC (pred, 0)->dest == bb) + exit = EDGE_SUCC (pred, 1)->dest; + else + exit = EDGE_SUCC (pred, 0)->dest; + + /* Check that EXIT is really loop exit. */ + if (flow_bb_inside_loop_p (loop, exit)) + { + edge_iterator eei; + edge ee; + + FOR_EACH_EDGE (ee, eei, exit->succs) + { + if (!flow_bb_inside_loop_p (loop, ee->dest)) + return true; + } + } + return false; + +} + /* Checks whether E is a simple exit from LOOP and stores its description into DESC. */ @@ -2890,7 +2979,8 @@ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc) return; /* It must be tested (at least) once during any iteration. */ - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)) + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb) + && !check_complex_exit_p (loop, exit_bb)) return; /* It must end in a simple conditional jump. */ diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c new file mode 100644 index 0000000..5207513 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr64081.c @@ -0,0 +1,40 @@ +/* PR rtl-optimization/64081 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */ + +long token; +long *data; +long *arr1; +long *arr2; + +int main (int argc, const char* argv[]) +{ + unsigned long i; + static long pos, start, count; + static int dir; + + pos = 0; + dir = 1; + + for (i = 0; i < argc; i++) + { + start = pos; + for (count = 0; count <= 400; count++) + { + if (token == data[pos]) + break; + if (dir == 1) + pos = arr1[pos]; + else + pos = arr2[pos]; + if (pos == -1) + { + pos = start; + dir = !dir; + } + } + } + return pos + dir; +} + +/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */

PR rtl-optimization/64081: Enable RTL loop unrolling for duplicated exit blocks and back edges.

Commit Message

Comments

Patch