diff mbox series

[v5,3/3] PR80791 Consider doloop cmp use in ivopts

Message ID f48de819-7ac9-17d7-ef9c-eea3c781a482@linux.ibm.com
State New
Headers show
Series None | expand

Commit Message

Kewen.Lin July 23, 2019, 6:14 a.m. UTC
Hi Bin,

This patch follows your suggestion, to avoid use infinite cost iv cand to rewrite.
In order to allow other IV cands to be considered, zeroing the iv cand cost if 
its users are only doloop uses.  (See the typical case in previous reply.)

Could you please have a look?  Thanks in advance!


Kewen
-------------

gcc/ChangeLog

2019-07-23  Kewen Lin  <linkw@gcc.gnu.org>

	PR middle-end/80791
	* target.def (have_count_reg_decr_p): New hook.
	* doc/tm.texi.in (TARGET_HAVE_COUNT_REG_DECR_P): New hook.
	* doc/tm.texi: Regenerate.
	* config/rs6000/rs6000.c (rs6000_have_count_reg_decr_p): New function.
	(TARGET_HAVE_COUNT_REG_DECR_P): New macro.
	* tree-ssa-loop-ivopts.c (adjust_group_iv_cost_for_doloop): New function.
	(find_doloop_use): Likewise.
	(record_group): Init doloop_p.
	(determine_group_iv_cost): Call adjust_group_iv_cost_for_doloop.
	(tree_ssa_iv_optimize_loop): Call function have_count_reg_decr_p, 
	generic_predict_doloop_p and find_doloop_use.
	(generic_predict_doloop_p): Update attribute.
	(iv_ca_set_no_cp): Adjust cand cost handling for doloop.
	(iv_ca_set_cp): Likewise.
	(iv_ca_new): Init n_cand_doloop_uses.
	(iv_ca_free): Free n_cand_doloop_uses.

gcc/testsuite/ChangeLog

2019-07-23  Kewen Lin  <linkw@gcc.gnu.org>

	PR middle-end/80791
	* gcc.dg/tree-ssa/ivopts-lt.c: Adjust.
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6667cd0..e98aba9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1912,6 +1912,9 @@  static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_PREDICT_DOLOOP_P
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
+#undef TARGET_HAVE_COUNT_REG_DECR_P
+#define TARGET_HAVE_COUNT_REG_DECR_P true
+
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c2aa4d0..5477294 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11618,6 +11618,14 @@  loops, and will help ivopts to make some decisions.
 The default version of this hook returns false.
 @end deftypefn
 
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
+Return true if the target supports hardware count register for decrement
+and branch.  This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
+
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int @var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index b4d57b8..5f43b27 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7946,6 +7946,8 @@  to by @var{ce_info}.
 
 @hook TARGET_PREDICT_DOLOOP_P
 
+@hook TARGET_HAVE_COUNT_REG_DECR_P
+
 @hook TARGET_CAN_USE_DOLOOP_P
 
 @hook TARGET_INVALID_WITHIN_DOLOOP
diff --git a/gcc/target.def b/gcc/target.def
index 71b6972..8a64e5b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4246,6 +4246,16 @@  The default version of this hook returns false.",
  bool, (struct loop *loop),
  default_predict_doloop_p)
 
+DEFHOOKPOD
+(have_count_reg_decr_p,
+ "Return true if the target supports hardware count register for decrement\n\
+and branch.  This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
+
 DEFHOOK
 (can_use_doloop_p,
  "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 7d5859b..3486e1a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -18,5 +18,5 @@  f1 (char *p, uintptr_t i, uintptr_t n)
 }
 
 /* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts"} } */
-/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 530ea4a..80a0f12 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -399,6 +399,8 @@  struct iv_group
   struct cost_pair *cost_map;
   /* The selected candidate for the group.  */
   struct iv_cand *selected;
+  /* To indicate this is a doloop use group.  */
+  bool doloop_p;
   /* Uses in the group.  */
   vec<struct iv_use *> vuses;
 };
@@ -612,6 +614,9 @@  struct ivopts_data
 
   /* Whether the loop body can only be exited via single exit.  */
   bool loop_single_exit_p;
+
+  /* Whether the loop has doloop comparison use.  */
+  bool doloop_use_p;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -630,6 +635,9 @@  struct iv_ca
   /* Number of times each candidate is used.  */
   unsigned *n_cand_uses;
 
+  /* How many doloop uses for each candidates.  */
+  unsigned *n_cand_doloop_uses;
+
   /* The candidates used.  */
   bitmap cands;
 
@@ -1528,6 +1536,7 @@  record_group (struct ivopts_data *data, enum use_type type)
   group->type = type;
   group->related_cands = BITMAP_ALLOC (NULL);
   group->vuses.create (1);
+  group->doloop_p = false;
 
   data->vgroups.safe_push (group);
   return group;
@@ -3724,7 +3733,7 @@  prepare_decl_rtl (tree *expr_p, int *ws, void *data)
    Some RTL specific checks seems unable to be checked in gimple, if any new
    checks or easy checks _are_ missing here, please add them.  */
 
-static bool ATTRIBUTE_UNUSED
+static bool
 generic_predict_doloop_p (struct ivopts_data *data)
 {
   struct loop *loop = data->current_loop;
@@ -5291,6 +5300,17 @@  determine_group_iv_cost_cond (struct ivopts_data *data,
   return !cost.infinite_cost_p ();
 }
 
+/* Set no cost for pair between doloop iv use GROUP and iv cand CAND.  */
+
+static void
+adjust_group_iv_cost_for_doloop (struct ivopts_data *data,
+				 struct iv_group *group, struct iv_cand *cand)
+{
+  struct cost_pair *cp = get_group_iv_cost (data, group, cand);
+  gcc_assert (cp);
+  cp->cost = no_cost;
+}
+
 /* Determines cost of computing uses in GROUP with CAND.  Returns false
    if USE cannot be represented with CAND.  */
 
@@ -5308,7 +5328,12 @@  determine_group_iv_cost (struct ivopts_data *data,
       return determine_group_iv_cost_address (data, group, cand);
 
     case USE_COMPARE:
-      return determine_group_iv_cost_cond (data, group, cand);
+      {
+	bool finite_cost_p = determine_group_iv_cost_cond (data, group, cand);
+	if (data->doloop_use_p && group->doloop_p && finite_cost_p)
+	  adjust_group_iv_cost_for_doloop (data, group, cand);
+	return finite_cost_p;
+      }
 
     default:
       gcc_unreachable ();
@@ -5829,11 +5854,15 @@  iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
   ivs->cand_for_group[gid] = NULL;
   ivs->n_cand_uses[cid]--;
 
+  if (group->doloop_p)
+    ivs->n_cand_doloop_uses[cid]--;
+  else if (ivs->n_cand_uses[cid] == ivs->n_cand_doloop_uses[cid])
+    ivs->cand_cost -= cp->cand->cost;
+
   if (ivs->n_cand_uses[cid] == 0)
     {
       bitmap_clear_bit (ivs->cands, cid);
       ivs->n_cands--;
-      ivs->cand_cost -= cp->cand->cost;
       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
     }
@@ -5886,11 +5915,14 @@  iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
       ivs->bad_groups--;
       ivs->cand_for_group[gid] = cp;
       ivs->n_cand_uses[cid]++;
+      if (group->doloop_p)
+	ivs->n_cand_doloop_uses[cid]++;
+      else if (ivs->n_cand_uses[cid] == (ivs->n_cand_doloop_uses[cid] + 1))
+	ivs->cand_cost += cp->cand->cost;
       if (ivs->n_cand_uses[cid] == 1)
 	{
 	  bitmap_set_bit (ivs->cands, cid);
 	  ivs->n_cands++;
-	  ivs->cand_cost += cp->cand->cost;
 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
 	}
@@ -6098,6 +6130,7 @@  iv_ca_new (struct ivopts_data *data)
   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
 				 data->vgroups.length ());
   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
+  nw->n_cand_doloop_uses = XCNEWVEC (unsigned, data->vcands.length ());
   nw->cands = BITMAP_ALLOC (NULL);
   nw->n_cands = 0;
   nw->n_invs = 0;
@@ -6117,6 +6150,7 @@  iv_ca_free (struct iv_ca **ivs)
 {
   free ((*ivs)->cand_for_group);
   free ((*ivs)->n_cand_uses);
+  free ((*ivs)->n_cand_doloop_uses);
   BITMAP_FREE ((*ivs)->cands);
   free ((*ivs)->n_inv_var_uses);
   free ((*ivs)->n_inv_expr_uses);
@@ -7568,6 +7602,47 @@  determine_scaling_factor (struct ivopts_data *data, basic_block *body)
     }
 }
 
+/* Find doloop comparison use and set its doloop_p on if found.  */
+
+static bool
+find_doloop_use (struct ivopts_data *data)
+{
+  struct loop *loop = data->current_loop;
+
+  for (unsigned i = 0; i < data->vgroups.length (); i++)
+    {
+      struct iv_group *group = data->vgroups[i];
+      if (group->type == USE_COMPARE)
+	{
+	  gcc_assert (group->vuses.length () == 1);
+	  struct iv_use *use = group->vuses[0];
+	  gimple *stmt = use->stmt;
+	  if (gimple_code (stmt) == GIMPLE_COND)
+	    {
+	      basic_block bb = gimple_bb (stmt);
+	      edge true_edge, false_edge;
+	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+	      /* This comparison is used for loop latch.  Require latch is empty
+		 for now.  */
+	      if ((loop->latch == true_edge->dest
+		   || loop->latch == false_edge->dest)
+		  && empty_block_p (loop->latch))
+		{
+		  group->doloop_p = true;
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    {
+		      fprintf (dump_file, "Doloop cmp iv use: ");
+		      print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
+		    }
+		  return true;
+		}
+	    }
+	}
+    }
+
+  return false;
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
@@ -7580,6 +7655,7 @@  tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
   basic_block *body;
 
   gcc_assert (!data->niters);
+  data->doloop_use_p = false;
   data->current_loop = loop;
   data->loop_loc = find_loop_location (loop).get_location_t ();
   data->speed = optimize_loop_for_speed_p (loop);
@@ -7625,6 +7701,19 @@  tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
   /* Finds candidates for the induction variables (item 2).  */
   find_iv_candidates (data);
 
+  if (flag_branch_on_count_reg && targetm.have_count_reg_decr_p
+      && generic_predict_doloop_p (data))
+    {
+      data->doloop_use_p = find_doloop_use (data);
+      if (data->doloop_use_p && dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file,
+		   "Predict loop %d can perform doloop optimization later.\n",
+		   loop->num);
+	  flow_loop_dump (loop, dump_file, NULL, 1);
+	}
+    }
+
   /* Calculates the costs (item 3, part 1).  */
   determine_iv_costs (data);
   determine_group_iv_costs (data);