@@ -197,6 +197,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "ssa.h"
#include "gimple-pretty-print.h"
+#include "stor-layout.h"
#include "alias.h"
#include "fold-const.h"
#include "cfgloop.h"
@@ -207,6 +208,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-regpressure.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-dfa.h"
@@ -2510,6 +2512,57 @@ insert_init_seqs (struct loop *loop, vec<chain_p> chains)
}
}
+/* Prune chains causing high register pressure. */
+
+static bool
+prune_chains (vec<chain_p> *chains, unsigned *max_pressure)
+{
+ bool pruned_p = false;
+ machine_mode mode;
+ enum reg_class cl;
+ unsigned i, new_pressure;
+
+ for (i = 0; i < chains->length ();)
+ {
+ chain_p chain = (*chains)[i];
+ /* Always allow combined chain and zero-length chain. */
+ if (chain->combined
+ || chain->type == CT_COMBINATION || chain->length == 0)
+ {
+ i++;
+ continue;
+ }
+
+ gcc_assert (chain->refs.length () > 0);
+ mode = TYPE_MODE (TREE_TYPE (chain->refs[0]->ref->ref));
+ /* Bypass chain that doesn't contribute to any reg_class, although
+ something could be wrong when mapping type mode to reg_class. */
+ if (ira_mode_classes[mode] == NO_REGS)
+ {
+ i++;
+ continue;
+ }
+
+ cl = ira_pressure_class_translate[ira_mode_classes[mode]];
+ /* Prune chain if it causes higher register pressure than available
+ registers; otherwise keep the chain and update register pressure
+ information. */
+ new_pressure = max_pressure[cl] + chain->length - 1;
+ if (new_pressure <= target_avail_regs[cl])
+ {
+ i++;
+ max_pressure[cl] = new_pressure;
+ }
+ else
+ {
+ gimple_seq_discard (chain->init_seq);
+ chains->unordered_remove (i);
+ pruned_p = true;
+ }
+ }
+ return pruned_p;
+}
+
/* Performs predictive commoning for LOOP. Returns true if LOOP was
unrolled. */
@@ -2523,6 +2576,8 @@ tree_predictive_commoning_loop (struct loop *loop)
unsigned unroll_factor;
struct tree_niter_desc desc;
bool unroll = false;
+ bool high_pressure_p;
+ unsigned max_pressure[N_REG_CLASSES];
edge exit;
bitmap tmp_vars;
@@ -2592,6 +2647,11 @@ tree_predictive_commoning_loop (struct loop *loop)
/* Try to combine the chains that are always worked with together. */
try_combine_chains (&chains);
+ compute_loop_reg_pressure (loop, max_pressure, &high_pressure_p);
+ if (prune_chains (&chains, max_pressure))
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Prune chain because of high reg pressure\n");
+
insert_init_seqs (loop, chains);
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2603,6 +2663,13 @@ tree_predictive_commoning_loop (struct loop *loop)
/* Determine the unroll factor, and if the loop should be unrolled, ensure
that its number of iterations is divisible by the factor. */
unroll_factor = determine_unroll_factor (chains);
+ /* Force to not unroll if register pressure is high. */
+ if (high_pressure_p && unroll_factor > 1)
+ {
+ unroll_factor = 1;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Force to not unroll because of high reg pressure\n");
+ }
scev_reset ();
unroll = (unroll_factor > 1
&& can_unroll_loop_p (loop, unroll_factor, &desc));
Hi, Aggressive precom could result in larger number loop carried variables, causes high register pressure and spilling. One example is the hot loop of 436.cactusADM, in which >25 loop carried variables are introduced for the vectorized version loop, depending on the vector factor. This patch computes loop register pressure on tree ssa using previously introduced interface. It uses the information to prune chains with simple heuristic. For example, combined and zero-length chains are always allowed; other chains are allowed under register cost; and loop unrolling is forced off if register pressure is high. With this patch, the benchmark can be obvious improved on AArch64. Bootstrap and test on x86_64 and AArch64, is it OK? Thanks, bin 2017-05-10 Bin Cheng <bin.cheng@arm.com> * tree-predcom.c (stor-layout.h, tree-ssa-regpressure.h): New header files. (prune_chains): New function. (tree_predictive_commoning_loop): Call compute_loop_reg_pressure to compute reg pressure. Prune chains based on reg pressure. Force to not unroll if reg pressure is high. From 744a63e16063451c6c36a8f08271c5174c902392 Mon Sep 17 00:00:00 2001 From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com> Date: Mon, 8 May 2017 11:04:46 +0100 Subject: [PATCH 6/6] pcom-reg-pressure-20170503.txt --- gcc/tree-predcom.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+)