From patchwork Sun Jun 26 19:36:56 2016
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jan Hubicka <hubicka@ucw.cz>
X-Patchwork-Id: 640717
Return-Path: 
 <gcc-patches-return-430510-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id 3rd2QM0TgCz9sdg
	for <incoming@patchwork.ozlabs.org>;
	Mon, 27 Jun 2016 05:37:12 +1000 (AEST)
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org
	header.b=UbUH4jKS; dkim-atps=neutral
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; q=dns; s=
	default; b=narXCPYFUBMIBcF4czBXVjVf6rSFF9H0OPkqWTwIo4gnhStxZKncC
	yOtSpmGWaJ95t//No3bM8UxJYOQf65C/4N4a1/KnwDJwK2pWL3Qmz5mpTefB11sm
	m1MXF7+JwD8Weu+UExRaM97z3nKpHX+niC2121VW9RgZnaFWu6m6VM=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; s=
	default; bh=pc/of1A3/17OOgrmQ11RDtamZEQ=; b=UbUH4jKStLvHAUdgLKWS
	XxYWf1DPiiCOom69LKU29rkODGuTLLOGwCVq2WmYqd9EXuxZfOMVUeXY5arn9rNc
	1r88QJEWUNOaAczCi5+p7kmKEDsPHyggFSOkFPjet8xjm7srFHwuavp7jtHaNoYw
	DyTLu4okAGpdIrEBCfDoGqU=
Received: (qmail 86010 invoked by alias); 26 Jun 2016 19:37:04 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 86000 invoked by uid 89); 26 Jun 2016 19:37:03 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=0.3 required=5.0 tests=AWL, BAYES_50,
	KAM_ASCII_DIVIDERS, KAM_LAZY_DOMAIN_SECURITY,
	RP_MATCHES_RCVD autolearn=no version=3.3.2 spammy=Recording,
	not_taken, UD:predict.def, predict.def
X-HELO: nikam.ms.mff.cuni.cz
Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz)
	(195.113.20.16) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with (AES256-GCM-SHA384
	encrypted) ESMTPS; Sun, 26 Jun 2016 19:37:00 +0000
Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202)	id
	AB8B95458BF; Sun, 26 Jun 2016 21:36:56 +0200 (CEST)
Date: Sun, 26 Jun 2016 21:36:56 +0200
From: Jan Hubicka <hubicka@ucw.cz>
To: gcc-patches@gcc.gnu.org
Subject: Predict loops containing recursive call with fewer iterations
Message-ID: <20160626193656.GE75430@kam.mff.cuni.cz>
MIME-Version: 1.0
Content-Disposition: inline
User-Agent: Mutt/1.5.21 (2010-09-15)

Hi,
this patch further tune the branch prediction in recursive functions.
The loop predictor now check if the loop body contains a recursive call and
use different predictors in that case.  This makes it possible to specify
that loops contianing self recursive calls usually iterate fewer time.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

	* gcc.dg/predict-12.c: New testcase.

	* predict.c: Include gimple-pretty-print.h
	(predicted_by_loop_heuristics_p): Check also
	PRED_LOOP_EXIT_WITH_RECURSION
	(predict_loops): Find self recursive calls and use special purpose
	predictors for them; dump log about decisions.
	(pass_profile::execute): Dump info about #of iterations.
	* predict.def (PRED_LOOP_EXIT_WITH_RECURSION,
	(PRED_LOOP_GUARD_WITH_RECURSION): New predictors.

Index: testsuite/gcc.dg/predict-12.c
===================================================================
--- testsuite/gcc.dg/predict-12.c	(revision 0)
+++ testsuite/gcc.dg/predict-12.c	(working copy)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+int *a,n,m;
+void test(void);
+void
+t(void)
+{
+  int i,j;
+  for (i=0;i<n;i++)
+    if (a[i])
+      for (j=0;j<m;j++)
+	if (a[i+j])
+	  t();
+}
+/* { dg-final { scan-tree-dump-times "loop guard with recursion" 1 "profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "loop exit with recursion" 2 "profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "recursive call" 1 "profile_estimate"} } */
Index: predict.c
===================================================================
--- predict.c	(revision 237789)
+++ predict.c	(working copy)
@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-loop.h"
 #include "tree-scalar-evolution.h"
 #include "ipa-utils.h"
+#include "gimple-pretty-print.h"
 
 /* Enum with reasons why a predictor is ignored.  */
 
@@ -1407,6 +1408,7 @@ predicted_by_loop_heuristics_p (basic_bl
 	|| i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
 	|| i->ep_predictor == PRED_LOOP_ITERATIONS
 	|| i->ep_predictor == PRED_LOOP_EXIT
+	|| i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
 	|| i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
       return true;
   return false;
@@ -1686,6 +1688,24 @@ static void
 predict_loops (void)
 {
   struct loop *loop;
+  basic_block bb;
+  hash_set <struct loop *> with_recursion(10);
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      gimple_stmt_iterator gsi;
+      tree decl;
+
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	if (is_gimple_call (gsi_stmt (gsi))
+	    && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
+	    && recursive_call_p (current_function_decl, decl))
+	  {
+	    loop = bb->loop_father;
+	    while (loop && !with_recursion.add (loop))
+	      loop = loop_outer (loop);
+	  }
+    }
 
   /* Try to predict out blocks in a loop that are not part of a
      natural loop.  */
@@ -1702,6 +1722,7 @@ predict_loops (void)
       tree loop_bound_var = NULL;
       tree loop_iv_base = NULL;
       gcond *stmt = NULL;
+      bool recursion = with_recursion.contains (loop);
 
       exits = get_loop_exit_edges (loop);
       FOR_EACH_VEC_ELT (exits, j, ex)
@@ -1713,6 +1734,23 @@ predict_loops (void)
 	  continue;
 	}
 
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
+		 loop->num, recursion ? " (with recursion)":"", n_exits);
+      if (dump_file && (dump_flags & TDF_DETAILS)
+	  && max_loop_iterations_int (loop) >= 0)
+	{
+	  fprintf (dump_file,
+		   "Loop %d iterates at most %i times.\n", loop->num,
+		   (int)max_loop_iterations_int (loop));
+	}
+      if (dump_file && (dump_flags & TDF_DETAILS)
+	  && likely_max_loop_iterations_int (loop) >= 0)
+	{
+	  fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
+		   loop->num, (int)likely_max_loop_iterations_int (loop));
+	}
+
       FOR_EACH_VEC_ELT (exits, j, ex)
 	{
 	  tree niter = NULL;
@@ -1727,13 +1765,28 @@ predict_loops (void)
 	  /* Loop heuristics do not expect exit conditional to be inside
 	     inner loop.  We predict from innermost to outermost loop.  */
 	  if (predicted_by_loop_heuristics_p (ex->src))
-	    continue;
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Skipping exit %i->%i because "
+			 "it is already predicted.\n",
+			 ex->src->index, ex->dest->index);
+	      continue;
+	    }
 	  predict_extra_loop_exits (ex);
 
 	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
 	    niter = niter_desc.niter;
 	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
 	    niter = loop_niter_by_eval (loop, ex);
+	  if (dump_file && (dump_flags & TDF_DETAILS)
+	      && TREE_CODE (niter) == INTEGER_CST)
+	    {
+	      fprintf (dump_file, "Exit %i->%i %d iterates ",
+		       ex->src->index, ex->dest->index,
+		       loop->num);
+	      print_generic_expr (dump_file, niter, TDF_SLIM);
+	      fprintf (dump_file, " times.\n");
+	    }
 
 	  if (TREE_CODE (niter) == INTEGER_CST)
 	    {
@@ -1766,14 +1819,24 @@ predict_loops (void)
 				 RDIV (REG_BR_PROB_BASE,
 				       REG_BR_PROB_BASE
 					 - predictor_info
-						 [PRED_LOOP_EXIT].hitrate)))
+						 [recursion
+						  ? PRED_LOOP_EXIT_WITH_RECURSION
+						  : PRED_LOOP_EXIT].hitrate)))
 	    {
 	      nitercst = nit.to_shwi ();
 	      predictor = PRED_LOOP_ITERATIONS_MAX;
 	    }
 	  else
-	    continue;
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Nothing known about exit %i->%i.\n",
+			 ex->src->index, ex->dest->index);
+	      continue;
+	    }
 
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
+		     (int)nitercst, predictor_info[predictor].name);
 	  /* If the prediction for number of iterations is zero, do not
 	     predict the exit edges.  */
 	  if (nitercst == 0)
@@ -1807,7 +1870,6 @@ predict_loops (void)
 
       for (j = 0; j < loop->num_nodes; j++)
 	{
-	  int header_found = 0;
 	  edge e;
 	  edge_iterator ei;
 
@@ -1818,14 +1880,16 @@ predict_loops (void)
 	     in the source language and are better to be handled
 	     separately.  */
 	  if (predicted_by_p (bb, PRED_CONTINUE))
-	    continue;
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "BB %i predicted by continue.\n",
+			 bb->index);
+	      continue;
+	    }
 
-	  /* Loop exit heuristics - predict an edge exiting the loop if the
-	     conditional has no loop header successors as not taken.  */
-	  if (!header_found
-	      /* If we already used more reliable loop exit predictors, do not
-		 bother with PRED_LOOP_EXIT.  */
-	      && !predicted_by_loop_heuristics_p (bb))
+	  /* If we already used more reliable loop exit predictors, do not
+	     bother with PRED_LOOP_EXIT.  */
+	  if (!predicted_by_loop_heuristics_p (bb))
 	    {
 	      /* For loop with many exits we don't want to predict all exits
 	         with the pretty large probability, because if all exits are
@@ -1842,14 +1906,25 @@ predict_loops (void)
 		 a wide loop.  */
 
 	      int probability = ((REG_BR_PROB_BASE
-		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
+		                  - predictor_info
+				     [recursion
+				      ? PRED_LOOP_EXIT_WITH_RECURSION
+				      : PRED_LOOP_EXIT].hitrate)
 				 / n_exits);
 	      if (probability < HITRATE (2))
 		probability = HITRATE (2);
 	      FOR_EACH_EDGE (e, ei, bb->succs)
 		if (e->dest->index < NUM_FIXED_BLOCKS
 		    || !flow_bb_inside_loop_p (loop, e->dest))
-		  predict_edge (e, PRED_LOOP_EXIT, probability);
+		  {
+		    if (dump_file && (dump_flags & TDF_DETAILS))
+		      fprintf (dump_file,
+			       "Predicting exit %i->%i with prob %i.\n",
+			       e->src->index, e->dest->index, probability);
+		    predict_edge (e,
+				  recursion ? PRED_LOOP_EXIT_WITH_RECURSION
+			          : PRED_LOOP_EXIT, probability);
+		  }
 	    }
 	  if (loop_bound_var)
 	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
@@ -1910,7 +1985,9 @@ predict_loops (void)
 	      if (!dominated_by_p (CDI_DOMINATORS,
 				   loop_outer (loop)->latch, loop->header))
 		predict_paths_leading_to_edge (loop_preheader_edge (loop),
-					       PRED_LOOP_GUARD,
+					       recursion
+					       ? PRED_LOOP_GUARD_WITH_RECURSION
+					       : PRED_LOOP_GUARD,
 					       NOT_TAKEN,
 					       loop_outer (loop));
 	    }
@@ -1919,7 +1996,9 @@ predict_loops (void)
 	      if (!dominated_by_p (CDI_DOMINATORS,
 				   loop_outer (loop)->latch, bb))
 		predict_paths_leading_to (bb,
-					  PRED_LOOP_GUARD,
+					  recursion
+					  ? PRED_LOOP_GUARD_WITH_RECURSION
+					  : PRED_LOOP_GUARD,
 					  NOT_TAKEN,
 					  loop_outer (loop));
 	    }
@@ -3367,6 +3446,15 @@ pass_profile::execute (function *fun)
     gimple_dump_cfg (dump_file, dump_flags);
  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
     profile_status_for_fn (fun) = PROFILE_GUESSED;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+     struct loop *loop;
+     FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+       if (loop->header->frequency)
+         fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
+       	   loop->num,
+       	   (int)expected_loop_iterations_unbounded (loop));
+   }
   return 0;
 }
 
Index: predict.def
===================================================================
--- predict.def	(revision 237789)
+++ predict.def	(working copy)
@@ -92,6 +92,10 @@ DEF_PREDICTOR (PRED_COLD_FUNCTION, "cold
 DEF_PREDICTOR (PRED_LOOP_EXIT, "loop exit", HITRATE (85),
 	       PRED_FLAG_FIRST_MATCH)
 
+/* Same as LOOP_EXIT but for loops containing recursive call.  */
+DEF_PREDICTOR (PRED_LOOP_EXIT_WITH_RECURSION, "loop exit with recursion",
+	       HITRATE (72), PRED_FLAG_FIRST_MATCH)
+
 /* Edge causing loop to terminate by computing value used by later
    conditional.  */
 DEF_PREDICTOR (PRED_LOOP_EXTRA_EXIT, "extra loop exit", HITRATE (83),
@@ -105,14 +109,17 @@ DEF_PREDICTOR (PRED_TREE_POINTER, "point
 DEF_PREDICTOR (PRED_OPCODE_POSITIVE, "opcode values positive", HITRATE (64), 0)
 DEF_PREDICTOR (PRED_OPCODE_NONEQUAL, "opcode values nonequal", HITRATE (66), 0)
 DEF_PREDICTOR (PRED_FPOPCODE, "fp_opcode", HITRATE (90), 0)
-DEF_PREDICTOR (PRED_TREE_OPCODE_POSITIVE, "opcode values positive (on trees)", HITRATE (64), 0)
-DEF_PREDICTOR (PRED_TREE_OPCODE_NONEQUAL, "opcode values nonequal (on trees)", HITRATE (66), 0)
+DEF_PREDICTOR (PRED_TREE_OPCODE_POSITIVE, "opcode values positive (on trees)",
+	       HITRATE (64), 0)
+DEF_PREDICTOR (PRED_TREE_OPCODE_NONEQUAL, "opcode values nonequal (on trees)",
+	       HITRATE (66), 0)
 DEF_PREDICTOR (PRED_TREE_FPOPCODE, "fp_opcode (on trees)", HITRATE (90), 0)
 
 /* Branch guarding call is probably taken.  */
 DEF_PREDICTOR (PRED_CALL, "call", HITRATE (67), 0)
 
-/* Recursive calls are usually not taken or the function will recurse indefinitely.  */
+/* Recursive calls are usually not taken or the function will recurse
+   indefinitely.  */
 DEF_PREDICTOR (PRED_RECURSIVE_CALL, "recursive call", HITRATE (75), 0)
 
 /* Branch causing function to terminate is probably not taken. 
@@ -157,7 +164,11 @@ DEF_PREDICTOR (PRED_LOOP_IV_COMPARE, "lo
        for (loop2)
 	 body;
    guess that cond is unlikely.  */
-DEF_PREDICTOR (PRED_LOOP_GUARD, "loop guard", HITRATE (66), 0)
+DEF_PREDICTOR (PRED_LOOP_GUARD, "loop guard", HITRATE (66), 0)
+
+/* Same but for loops containing recursion.  */
+DEF_PREDICTOR (PRED_LOOP_GUARD_WITH_RECURSION, "loop guard with recursion",
+	       HITRATE (85), 0)
 
 /* Branches to hot labels are likely.  */
 DEF_PREDICTOR (PRED_HOT_LABEL, "hot label", HITRATE (85), 0)