[GCC8,29/33] New register pressure estimation

Submitted by Bin Cheng on April 18, 2017, 10:53 a.m.

Details

Message ID VI1PR0802MB21762496F9B6DE1526FFE97EE7190@VI1PR0802MB2176.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng April 18, 2017, 10:53 a.m.
Hi,
Currently IVOPTs shares the same register pressure computation with RTL loop invariant pass,
which doesn't work very well.  This patch introduces specific interface for IVOPTs.
The general idea is described in the cover message as below:
  C) Current implementation shares the same register pressure computation with RTL loop
     inv pass.  It has difficulty in handling (especially large) loop nest, and quite
     often generating too many candidates (especially for outer loops).  This change
     introduces new register pressure computation.  The brief idea is to differentiate
     (hot) innermost loop and outer loop.  for (possibly hot) inner most, more registers
     are allowed as long as the register pressure is within the range of number of target
     available registers.
It can also help to restrict number of candidates for outer loop.
Is it OK?

Thanks,
bin
2017-04-11  Bin Cheng  <bin.cheng@arm.com>

	* tree-ssa-loop-ivopts.c (struct ivopts_data): New field.
	(ivopts_estimate_reg_pressure): New reg_pressure model function.
	(ivopts_global_cost_for_size): Delete.
	(determine_set_costs, iv_ca_recount_cost): Call new model function
	ivopts_estimate_reg_pressure.
	(determine_hot_innermost_loop): New.
	(tree_ssa_iv_optimize_loop): Call above function.
From 2b6f11666a86f740a7f813eca26905ce15691d5e Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Fri, 10 Mar 2017 11:03:16 +0000
Subject: [PATCH 29/33] ivopt-reg_pressure-model-20170223.txt

---
 gcc/tree-ssa-loop-ivopts.c | 82 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 72 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index db8254c..464f96e 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -589,6 +589,9 @@  struct ivopts_data
 
   /* Whether the loop body can only be exited via single exit.  */
   bool loop_single_exit_p;
+
+  /* The current loop is the innermost loop and maybe hot.  */
+  bool hot_innermost_loop_p;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -5537,17 +5540,51 @@  determine_iv_costs (struct ivopts_data *data)
     fprintf (dump_file, "\n");
 }
 
-/* Calculates cost for having N_REGS registers.  This number includes
-   induction variables, invariant variables and invariant expressions.  */
+/* Estimate register pressure for loop having N_INVS invariants and N_CANDS
+   induction variables.  Note N_INVS includes both invariant variables and
+   invariant expressions.  */
 
 static unsigned
-ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
+ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
+			      unsigned n_cands)
 {
-  unsigned cost = estimate_reg_pressure_cost (n_regs,
-					      data->regs_used, data->speed,
-					      data->body_includes_call);
-  /* Add n_regs to the cost, so that we prefer eliminating ivs if possible.  */
-  return n_regs + cost;
+  unsigned cost;
+  unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
+  unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
+  bool speed = data->speed, hot_p = data->hot_innermost_loop_p;
+
+  /* If there is a call in the loop body, the call-clobbered registers
+     are not available for loop invariants.  */
+  if (data->body_includes_call)
+    available_regs = available_regs - target_clobbered_regs;
+
+  /* If we have enough registers.  */
+  if (regs_needed + target_res_regs < available_regs)
+    {
+      /* For the maybe hot innermost loop, we use available registers and
+	 not restrict the transformations unnecessarily.  For other loops,
+	 we want to use fewer register.  */
+      cost = hot_p ? 0 : target_reg_cost [speed] * n_new; //regs_needed;
+    }
+  /* If close to running out of registers, try to preserve them.  */
+  else if (regs_needed <= available_regs)
+    cost = target_reg_cost [speed] * regs_needed;
+  /* If we run out of available registers but the number of candidates
+     does not, we penalize extra registers using target_spill_cost.  */
+  else if (n_cands <= available_regs)
+    cost = target_reg_cost [speed] * available_regs
+	   + target_spill_cost [speed] * (regs_needed - available_regs);
+  /* If the number of candidates runs out available registers, we penalize
+     extra candidate registers using target_spill_cost * 2.  Because it is
+     more expensive to spill induction variable than invariant.  */
+  else
+    cost = target_reg_cost [speed] * available_regs
+	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
+	   + target_spill_cost [speed] * (regs_needed - n_cands);
+
+  /* Finally, add the number of candidates, so that we prefer eliminating
+     induction variables if possible.  */
+  return cost + n_cands;
 }
 
 /* For each size of the induction variable set determine the penalty.  */
@@ -5607,7 +5644,7 @@  determine_set_costs (struct ivopts_data *data)
       fprintf (dump_file, "  ivs\tcost\n");
       for (j = 0; j <= 2 * target_avail_regs; j++)
 	fprintf (dump_file, "  %d\t%d\n", j,
-		 ivopts_global_cost_for_size (data, j));
+		 ivopts_estimate_reg_pressure (data, 0, j));
       fprintf (dump_file, "\n");
     }
 }
@@ -5666,7 +5703,7 @@  iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
   comp_cost cost = ivs->cand_use_cost;
 
   cost += ivs->cand_cost;
-  cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
+  cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
   ivs->cost = cost;
 }
 
@@ -7367,6 +7404,30 @@  loop_body_includes_call (basic_block *body, unsigned num_nodes)
   return false;
 }
 
+/* Determine if current loop is the innermost loop and maybe hot.  */
+
+static void
+determine_hot_innermost_loop (struct ivopts_data *data)
+{
+  data->hot_innermost_loop_p = true;
+  if (!data->speed)
+    return;
+
+  struct loop *loop = data->current_loop;
+  if (loop->inner != NULL)
+    {
+      data->hot_innermost_loop_p = false;
+      return;
+    }
+
+  HOST_WIDE_INT niter = avg_loop_niter (loop);
+  if (niter < PARAM_VALUE (PARAM_AVG_LOOP_NITER)
+      || loop_constraint_set_p (loop, LOOP_C_PROLOG)
+      || loop_constraint_set_p (loop, LOOP_C_EPILOG)
+      || loop_constraint_set_p (loop, LOOP_C_VERSION))
+    data->hot_innermost_loop_p = false;
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
@@ -7381,6 +7442,7 @@  tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
   data->current_loop = loop;
   data->loop_loc = find_loop_location (loop);
   data->speed = optimize_loop_for_speed_p (loop);
+  determine_hot_innermost_loop (data);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {