Message ID | DB6PR0802MB25044C0B907B393B7BE608BEE7860@DB6PR0802MB2504.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [1/6] Compute type mode and register class mapping | expand |
On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote: > Hi, > This patch restricts predcom pass using register pressure information. > In case of high register pressure, we now prune additional chains as well > as disable unrolling in predcom. In generally, I think this patch set is > useful. > > Bootstrap and test on x86_64 ongoing. Any comments? Simple update in line with changes in previous patch. Thanks, bin > > Thanks, > bin > 2018-04-27 Bin Cheng <bin.cheng@arm.com> > > * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include. > (REG_RELAX_RATIO, prune_chains): New. > (tree_predictive_commoning_loop): Compute reg pressure using class > region. Prune chains based on reg pressure. Force to not unroll > if reg pressure is high. From b78c779907b98930fc4b36e5558d6f315bb4475b Mon Sep 17 00:00:00 2001 From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com> Date: Wed, 25 Apr 2018 16:30:41 +0100 Subject: [PATCH 6/6] pcom-reg-pressure-20180428 --- gcc/tree-predcom.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index aeadbf7..60316e9 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -217,6 +217,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "gimple-pretty-print.h" +#include "stor-layout.h" #include "alias.h" #include "fold-const.h" #include "cfgloop.h" @@ -227,6 +228,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-ivopts.h" #include "tree-ssa-loop-manip.h" #include "tree-ssa-loop-niter.h" +#include "tree-ssa-live.h" #include "tree-ssa-loop.h" #include "tree-into-ssa.h" #include "tree-dfa.h" @@ -242,6 +244,10 @@ along with GCC; see the file COPYING3. If not see #define MAX_DISTANCE (target_avail_regs[GENERAL_REGS] < 16 ? 4 : 8) +/* The ratio by which register pressure check is relaxed. */ + +#define REG_RELAX_RATIO (2) + /* Data references (or phi nodes that carry data reference values across loop iterations). */ @@ -3156,6 +3162,59 @@ insert_init_seqs (struct loop *loop, vec<chain_p> chains) } } +/* Prune chains causing high register pressure. */ + +static void +prune_chains (vec<chain_p> *chains, unsigned *max_pressure) +{ + bool pruned_p = false; + machine_mode mode; + enum reg_class cl; + unsigned i, new_pressure; + + for (i = 0; i < chains->length ();) + { + chain_p chain = (*chains)[i]; + /* Always allow combined chain and zero-length chain. */ + if (chain->combined || chain->type == CT_COMBINATION + || chain->length == 0 || chain->type == CT_STORE_STORE) + { + i++; + continue; + } + + gcc_assert (chain->refs.length () > 0); + mode = TYPE_MODE (TREE_TYPE (chain->refs[0]->ref->ref)); + /* Bypass chain that doesn't contribute to any reg_class, although + something could be wrong when mapping type mode to reg_class. */ + if (ira_mode_classes[mode] == NO_REGS) + { + i++; + continue; + } + + cl = ira_pressure_class_translate[ira_mode_classes[mode]]; + /* Prune chain if it causes higher register pressure than available + registers; otherwise keep the chain and update register pressure + information. */ + new_pressure = max_pressure[cl] + chain->length - 1; + if (new_pressure <= target_avail_regs[cl] * REG_RELAX_RATIO) + { + i++; + max_pressure[cl] = new_pressure; + } + else + { + release_chain (chain); + chains->unordered_remove (i); + pruned_p = true; + } + } + + if (pruned_p && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Prune chain because of high reg pressure\n"); +} + /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa form was corrupted. */ @@ -3171,6 +3230,9 @@ tree_predictive_commoning_loop (struct loop *loop) struct tree_niter_desc desc; bool unroll = false, loop_closed_ssa = false; edge exit; + lr_region *region; + unsigned max_pressure[N_REG_CLASSES]; + bool high_pressure_p; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing loop %d\n", loop->num); @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop) /* Try to combine the chains that are always worked with together. */ try_combine_chains (loop, &chains); + region = new lr_region (loop, max_pressure, NULL, NULL, NULL); + high_pressure_p = region->calculate_pressure (); + delete region; + prune_chains (&chains, max_pressure); + insert_init_seqs (loop, chains); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -3250,6 +3317,13 @@ tree_predictive_commoning_loop (struct loop *loop) /* Determine the unroll factor, and if the loop should be unrolled, ensure that its number of iterations is divisible by the factor. */ unroll_factor = determine_unroll_factor (chains); + /* Force to not unroll if register pressure is high. */ + if (high_pressure_p && unroll_factor > 1) + { + unroll_factor = 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Force to not unroll because of high reg pressure\n"); + } scev_reset (); unroll = (unroll_factor > 1 && can_unroll_loop_p (loop, unroll_factor, &desc));
On Tue, 2018-05-29 at 17:04 +0100, Bin.Cheng wrote: > On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote: > > Hi, > > This patch restricts predcom pass using register pressure > > information. > > In case of high register pressure, we now prune additional chains > > as well > > as disable unrolling in predcom. In generally, I think this patch > > set is > > useful. > > > > Bootstrap and test on x86_64 ongoing. Any comments? > > Simple update in line with changes in previous patch. > > Thanks, > bin > > > > Thanks, > > bin > > 2018-04-27 Bin Cheng <bin.cheng@arm.com> > > > > * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include. > > (REG_RELAX_RATIO, prune_chains): New. > > (tree_predictive_commoning_loop): Compute reg pressure > > using class > > region. Prune chains based on reg pressure. Force to not > > unroll > > if reg pressure is high. [...snip...] > @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop) > /* Try to combine the chains that are always worked with together. */ > try_combine_chains (loop, &chains); > > + region = new lr_region (loop, max_pressure, NULL, NULL, NULL); > + high_pressure_p = region->calculate_pressure (); > + delete region; > + prune_chains (&chains, max_pressure); > + Possibly a silly question, but why the new/delete of "region" here? Couldn't this just be an on-stack object, with something like: lr_region region (loop, max_pressure, NULL, NULL, NULL); high_pressure_p = region.calculate_pressure (); prune_chains (&chains, max_pressure); or: { lr_region region (loop, max_pressure, NULL, NULL, NULL); high_pressure_p = region.calculate_pressure (); } prune_chains (&chains, max_pressure); if it's important to do the cleanup before prune_chains? Dave
On Tue, May 29, 2018 at 6:18 PM, David Malcolm <dmalcolm@redhat.com> wrote: > On Tue, 2018-05-29 at 17:04 +0100, Bin.Cheng wrote: >> On Fri, May 4, 2018 at 5:24 PM, Bin Cheng <Bin.Cheng@arm.com> wrote: >> > Hi, >> > This patch restricts predcom pass using register pressure >> > information. >> > In case of high register pressure, we now prune additional chains >> > as well >> > as disable unrolling in predcom. In generally, I think this patch >> > set is >> > useful. >> > >> > Bootstrap and test on x86_64 ongoing. Any comments? >> >> Simple update in line with changes in previous patch. >> >> Thanks, >> bin >> > >> > Thanks, >> > bin >> > 2018-04-27 Bin Cheng <bin.cheng@arm.com> >> > >> > * tree-predcom.c (stor-layout.h, tree-ssa-live.h): Include. >> > (REG_RELAX_RATIO, prune_chains): New. >> > (tree_predictive_commoning_loop): Compute reg pressure >> > using class >> > region. Prune chains based on reg pressure. Force to not >> > unroll >> > if reg pressure is high. > > [...snip...] > >> @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop) >> /* Try to combine the chains that are always worked with together. */ >> try_combine_chains (loop, &chains); >> >> + region = new lr_region (loop, max_pressure, NULL, NULL, NULL); >> + high_pressure_p = region->calculate_pressure (); >> + delete region; >> + prune_chains (&chains, max_pressure); >> + > > Possibly a silly question, but why the new/delete of "region" here? > Couldn't this just be an on-stack object, with something like: Yes, right. It contained dynamically allocated memory before, so made early deleting. Now a local object will do. Thanks, bin > > lr_region region (loop, max_pressure, NULL, NULL, NULL); > high_pressure_p = region.calculate_pressure (); > prune_chains (&chains, max_pressure); > > or: > > { > lr_region region (loop, max_pressure, NULL, NULL, NULL); > high_pressure_p = region.calculate_pressure (); > } > prune_chains (&chains, max_pressure); > > if it's important to do the cleanup before prune_chains? > > Dave
From 1b488665f8fea619c4ce35f71650c342df69de2f Mon Sep 17 00:00:00 2001 From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com> Date: Wed, 25 Apr 2018 16:30:41 +0100 Subject: [PATCH 6/6] pcom-reg-pressure-20180423 --- gcc/tree-predcom.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index aeadbf7..d0c18b3 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -217,6 +217,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "gimple-pretty-print.h" +#include "stor-layout.h" #include "alias.h" #include "fold-const.h" #include "cfgloop.h" @@ -227,6 +228,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-ivopts.h" #include "tree-ssa-loop-manip.h" #include "tree-ssa-loop-niter.h" +#include "tree-ssa-live.h" #include "tree-ssa-loop.h" #include "tree-into-ssa.h" #include "tree-dfa.h" @@ -242,6 +244,10 @@ along with GCC; see the file COPYING3. If not see #define MAX_DISTANCE (target_avail_regs[GENERAL_REGS] < 16 ? 4 : 8) +/* The ratio by which register pressure check is relaxed. */ + +#define REG_RELAX_RATIO (2) + /* Data references (or phi nodes that carry data reference values across loop iterations). */ @@ -3156,6 +3162,59 @@ insert_init_seqs (struct loop *loop, vec<chain_p> chains) } } +/* Prune chains causing high register pressure. */ + +static void +prune_chains (vec<chain_p> *chains, unsigned *max_pressure) +{ + bool pruned_p = false; + machine_mode mode; + enum reg_class cl; + unsigned i, new_pressure; + + for (i = 0; i < chains->length ();) + { + chain_p chain = (*chains)[i]; + /* Always allow combined chain and zero-length chain. */ + if (chain->combined || chain->type == CT_COMBINATION + || chain->length == 0 || chain->type == CT_STORE_STORE) + { + i++; + continue; + } + + gcc_assert (chain->refs.length () > 0); + mode = TYPE_MODE (TREE_TYPE (chain->refs[0]->ref->ref)); + /* Bypass chain that doesn't contribute to any reg_class, although + something could be wrong when mapping type mode to reg_class. */ + if (ira_mode_classes[mode] == NO_REGS) + { + i++; + continue; + } + + cl = ira_pressure_class_translate[ira_mode_classes[mode]]; + /* Prune chain if it causes higher register pressure than available + registers; otherwise keep the chain and update register pressure + information. */ + new_pressure = max_pressure[cl] + chain->length - 1; + if (new_pressure <= target_avail_regs[cl] * REG_RELAX_RATIO) + { + i++; + max_pressure[cl] = new_pressure; + } + else + { + release_chain (chain); + chains->unordered_remove (i); + pruned_p = true; + } + } + + if (pruned_p && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Prune chain because of high reg pressure\n"); +} + /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa form was corrupted. */ @@ -3171,6 +3230,9 @@ tree_predictive_commoning_loop (struct loop *loop) struct tree_niter_desc desc; bool unroll = false, loop_closed_ssa = false; edge exit; + lr_region *region; + unsigned max_pressure[N_REG_CLASSES]; + bool high_pressure_p; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing loop %d\n", loop->num); @@ -3239,6 +3301,11 @@ tree_predictive_commoning_loop (struct loop *loop) /* Try to combine the chains that are always worked with together. */ try_combine_chains (loop, &chains); + region = new lr_region (loop); + high_pressure_p = region->calculate_pressure (max_pressure); + delete region; + prune_chains (&chains, max_pressure); + insert_init_seqs (loop, chains); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -3250,6 +3317,13 @@ tree_predictive_commoning_loop (struct loop *loop) /* Determine the unroll factor, and if the loop should be unrolled, ensure that its number of iterations is divisible by the factor. */ unroll_factor = determine_unroll_factor (chains); + /* Force to not unroll if register pressure is high. */ + if (high_pressure_p && unroll_factor > 1) + { + unroll_factor = 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Force to not unroll because of high reg pressure\n"); + } scev_reset (); unroll = (unroll_factor > 1 && can_unroll_loop_p (loop, unroll_factor, &desc)); -- 1.9.1