From patchwork Sat Nov  9 17:57:14 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jan Hubicka <hubicka@ucw.cz>
X-Patchwork-Id: 1192469
Return-Path: 
 <gcc-patches-return-512883-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized)
	smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131;
	helo=sourceware.org;
	envelope-from=gcc-patches-return-512883-incoming=patchwork.ozlabs.org@gcc.gnu.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=none (p=none dis=none) header.from=ucw.cz
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org
	header.b="VQIcRypk"; dkim-atps=neutral
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id 479Px51GLrz9s4Y
	for <incoming@patchwork.ozlabs.org>;
	Sun, 10 Nov 2019 04:57:26 +1100 (AEDT)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; q=dns; s=
	default; b=TnFCkVEQRplzqJ+VdHfVO1WS7rd/OJ9cTMYifKp6al2PpLBApfarb
	S7oxIFr6LynXwjnIbKa1drqxNPZk3dyShDEucmKp4XK87cfZamybN2SIzx5eEuuM
	HkPOq5jTCkjqlttjImUpQn2O/CmVFDDGrMXQZB0uZYqbaqCDXrVlFg=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:date
	:from:to:subject:message-id:mime-version:content-type; s=
	default; bh=pF2SzLBaCGmqn89nmvN7NByragE=; b=VQIcRypkge5aOGQm5kDM
	dveHJ4GamhOroZksJbBlAboPoOsowdTnQ5VHbocXpVMBY21fMGQojVj0MkA7gxWU
	0YOjRUGV2e6oykDIz1tGLYWWUO8XZULmjG2bxBoTGVr6oVamizhsVkG3CfF+Npe2
	F1mgFb6GXAYKTcIpVajsRHk=
Received: (qmail 98380 invoked by alias); 9 Nov 2019 17:57:19 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 98370 invoked by uid 89); 9 Nov 2019 17:57:18 -0000
Authentication-Results: sourceware.org; auth=none
X-Spam-SWARE-Status: No, score=-10.0 required=5.0 tests=AWL, BAYES_00,
	GIT_PATCH_2, GIT_PATCH_3,
	KAM_ASCII_DIVIDERS autolearn=ham version=3.3.1 spammy=8999,
	Break, param_value, PARAM_VALUE
X-HELO: nikam.ms.mff.cuni.cz
Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz)
	(195.113.20.16) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP;
	Sat, 09 Nov 2019 17:57:16 +0000
Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202)	id
	8A6F528288C; Sat,  9 Nov 2019 18:57:14 +0100 (CET)
Date: Sat, 9 Nov 2019 18:57:14 +0100
From: Jan Hubicka <hubicka@ucw.cz>
To: gcc-patches@gcc.gnu.org
Subject: Speed up inlining functions called once
Message-ID: <20191109175714.5oyc37utwijguhtc@kam.mff.cuni.cz>
MIME-Version: 1.0
Content-Disposition: inline
User-Agent: NeoMutt/20170113 (1.7.2)

Hi,
it turns out that we spend a lot of time in inlining functions called
once.  This is because we in fact inline functions where inlining into
all callers and eliminating offline copy of it leads to smaller code.
For that we always compute growth of all edges in the callgraph that is
expensive. For most functions first few calls are enough to prove that
inlining will make code size grow.  This patch makes
growth_likely_positive reliable and implements the cap.

Bootstrapped/regtested x86_64-linux, comitted.

	* ipa-inline-analysis.c (do_estimate_growth_1): Add support for
	capping the growth cumulated.
	(offline_size): Break out from ...
	(estimate_growth): ... here.
	(check_callers): Add N, OFFLINE and MIN_SIZE and KNOWN_EDGE
	parameters.
	(growth_likely_positive): Turn to ...
	(growth_positive_p): Re-implement.
	* ipa-inline.h (growth_likely_positive): Remove.
	(growth_positive_p): Declare.
	* ipa-inline.c (want_inline_small_function_p): Use
	growth_positive_p.
	(want_inline_function_to_all_callers_p): Likewise.

Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 278006)
+++ ipa-inline-analysis.c	(working copy)
@@ -387,6 +387,7 @@ struct growth_data
   bool self_recursive;
   bool uninlinable;
   int growth;
+  int cap;
 };
 
 
@@ -406,29 +407,57 @@ do_estimate_growth_1 (struct cgraph_node
 	  || !opt_for_fn (e->caller->decl, optimize))
 	{
 	  d->uninlinable = true;
+	  if (d->cap < INT_MAX)
+	    return true;
           continue;
 	}
 
       if (e->recursive_p ())
 	{
 	  d->self_recursive = true;
+	  if (d->cap < INT_MAX)
+	    return true;
 	  continue;
 	}
       d->growth += estimate_edge_growth (e);
+      if (d->growth > d->cap)
+	return true;
     }
   return false;
 }
 
+/* Return estimated savings for eliminating offline copy of NODE by inlining
+   it everywhere.  */
+
+static int
+offline_size (struct cgraph_node *node, ipa_size_summary *info)
+{
+  if (!DECL_EXTERNAL (node->decl))
+    {
+      if (node->will_be_removed_from_program_if_no_direct_calls_p ())
+	return info->size;
+      /* COMDAT functions are very often not shared across multiple units
+         since they come from various template instantiations.
+         Take this into account.  */
+      else if (DECL_COMDAT (node->decl)
+	       && node->can_remove_if_no_direct_calls_p ())
+	return (info->size
+	        * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
+	        + 50) / 100;
+    }
+  return 0;
+}
 
 /* Estimate the growth caused by inlining NODE into all callees.  */
 
 int
 estimate_growth (struct cgraph_node *node)
 {
-  struct growth_data d = { node, false, false, 0 };
-  class ipa_size_summary *info = ipa_size_summaries->get (node);
+  struct growth_data d = { node, false, false, 0, INT_MAX };
+  ipa_size_summary *info = ipa_size_summaries->get (node);
 
-  node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
+  if (node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true))
+    return 1;
 
   /* For self recursive functions the growth estimation really should be
      infinity.  We don't want to return very large values because the growth
@@ -436,21 +465,8 @@ estimate_growth (struct cgraph_node *nod
      return zero or negative growths. */
   if (d.self_recursive)
     d.growth = d.growth < info->size ? info->size : d.growth;
-  else if (DECL_EXTERNAL (node->decl) || d.uninlinable)
-    ;
-  else
-    {
-      if (node->will_be_removed_from_program_if_no_direct_calls_p ())
-	d.growth -= info->size;
-      /* COMDAT functions are very often not shared across multiple units
-         since they come from various template instantiations.
-         Take this into account.  */
-      else if (DECL_COMDAT (node->decl)
-	       && node->can_remove_if_no_direct_calls_p ())
-	d.growth -= (info->size
-		     * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY))
-		     + 50) / 100;
-    }
+  else if (!d.uninlinable)
+    d.growth -= offline_size (node, info);
 
   return d.growth;
 }
@@ -458,7 +474,8 @@ estimate_growth (struct cgraph_node *nod
 /* Verify if there are fewer than MAX_CALLERS.  */
 
 static bool
-check_callers (cgraph_node *node, int *max_callers)
+check_callers (cgraph_node *node, int *growth, int *n, int offline,
+	       int min_size, struct cgraph_edge *known_edge)
 {
   ipa_ref *ref;
 
@@ -467,70 +484,96 @@ check_callers (cgraph_node *node, int *m
 
   for (cgraph_edge *e = node->callers; e; e = e->next_caller)
     {
-      (*max_callers)--;
-      if (!*max_callers
-	  || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
+      edge_growth_cache_entry *entry;
+
+      if (e == known_edge)
+	continue;
+      if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
+	return true;
+      if (edge_growth_cache != NULL
+	  && (entry = edge_growth_cache->get (e)) != NULL
+	  && entry->size != 0)
+	*growth += entry->size - (entry->size > 0);
+      else
+	{
+	  class ipa_call_summary *es = ipa_call_summaries->get (e);
+	  if (!es)
+	    return true;
+	  *growth += min_size - es->call_stmt_size;
+	  if (--(*n) < 0)
+	    return false;
+	}
+      if (*growth > offline)
 	return true;
     }
 
-  FOR_EACH_ALIAS (node, ref)
-    if (check_callers (dyn_cast <cgraph_node *> (ref->referring), max_callers))
-      return true;
+  if (*n > 0)
+    FOR_EACH_ALIAS (node, ref)
+      if (check_callers (dyn_cast <cgraph_node *> (ref->referring), growth, n,
+			 offline, min_size, known_edge))
+	return true;
 
   return false;
 }
 
 
-/* Make cheap estimation if growth of NODE is likely positive knowing
-   EDGE_GROWTH of one particular edge. 
-   We assume that most of other edges will have similar growth
-   and skip computation if there are too many callers.  */
+/* Decide if growth of NODE is positive.  This is cheaper than calculating
+   actual growth.  If edge growth of KNOWN_EDGE is known
+   it is passed by EDGE_GROWTH.  */
 
 bool
-growth_likely_positive (struct cgraph_node *node,
-		        int edge_growth)
+growth_positive_p (struct cgraph_node *node,
+		   struct cgraph_edge * known_edge, int edge_growth)
 {
-  int max_callers;
   struct cgraph_edge *e;
-  gcc_checking_assert (edge_growth > 0);
+
+  ipa_size_summary *s = ipa_size_summaries->get (node);
 
   /* First quickly check if NODE is removable at all.  */
-  if (DECL_EXTERNAL (node->decl))
-    return true;
-  if (!node->can_remove_if_no_direct_calls_and_refs_p ()
-      || node->address_taken)
+  int offline = offline_size (node, s);
+  if (offline <= 0 && known_edge && edge_growth > 0)
     return true;
 
-  max_callers = ipa_size_summaries->get (node)->size * 4 / edge_growth + 2;
+  int min_size = ipa_fn_summaries->get (node)->min_size;
+  int n = 10;
 
+  int min_growth = known_edge ? edge_growth : 0;
   for (e = node->callers; e; e = e->next_caller)
     {
-      max_callers--;
-      if (!max_callers
-	  || cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
+      edge_growth_cache_entry *entry;
+
+      if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
+	return true;
+      if (e == known_edge)
+	continue;
+      if (edge_growth_cache != NULL
+	  && (entry = edge_growth_cache->get (e)) != NULL
+	  && entry->size != 0)
+	min_growth += entry->size - (entry->size > 0);
+      else
+	{
+	  class ipa_call_summary *es = ipa_call_summaries->get (e);
+	  if (!es)
+	    return true;
+	  min_growth += min_size - es->call_stmt_size;
+	  if (--n <= 0)
+	    break;
+	}
+      if (min_growth > offline)
 	return true;
     }
 
   ipa_ref *ref;
-  FOR_EACH_ALIAS (node, ref)
-    if (check_callers (dyn_cast <cgraph_node *> (ref->referring), &max_callers))
-      return true;
-
-  /* Unlike for functions called once, we play unsafe with
-     COMDATs.  We can allow that since we know functions
-     in consideration are small (and thus risk is small) and
-     moreover grow estimates already accounts that COMDAT
-     functions may or may not disappear when eliminated from
-     current unit. With good probability making aggressive
-     choice in all units is going to make overall program
-     smaller.  */
-  if (DECL_COMDAT (node->decl))
-    {
-      if (!node->can_remove_if_no_direct_calls_p ())
+  if (n > 0)
+    FOR_EACH_ALIAS (node, ref)
+      if (check_callers (dyn_cast <cgraph_node *> (ref->referring),
+			 &min_growth, &n, offline, min_size, known_edge))
 	return true;
-    }
-  else if (!node->will_be_removed_from_program_if_no_direct_calls_p ())
-    return true;
 
-  return estimate_growth (node) > 0;
+  struct growth_data d = { node, false, false, 0, offline };
+  if (node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true))
+    return true;
+  if (d.self_recursive || d.uninlinable)
+    return true;
+  return (d.growth > offline);
 }
Index: ipa-inline.h
===================================================================
--- ipa-inline.h	(revision 278006)
+++ ipa-inline.h	(working copy)
@@ -44,7 +44,7 @@ extern fast_call_summary<edge_growth_cac
 /* In ipa-inline-analysis.c  */
 int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *);
 int estimate_growth (struct cgraph_node *);
-bool growth_likely_positive (struct cgraph_node *, int);
+bool growth_positive_p (struct cgraph_node *, struct cgraph_edge *, int);
 int do_estimate_edge_size (struct cgraph_edge *edge);
 sreal do_estimate_edge_time (struct cgraph_edge *edge);
 ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge);
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 278006)
+++ ipa-inline.c	(working copy)
@@ -883,9 +885,9 @@ want_inline_small_function_p (struct cgr
 	       && !opt_for_fn (e->caller->decl, flag_inline_functions)
 	       && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL))
 	{
-	  /* growth_likely_positive is expensive, always test it last.  */
+	  /* growth_positive_p is expensive, always test it last.  */
           if (growth >= inline_insns_single (e->caller, false)
-	      || growth_likely_positive (callee, growth))
+	      || growth_positive_p (callee, e, growth))
 	    {
               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 	      want_inline = false;
@@ -899,9 +901,9 @@ want_inline_small_function_p (struct cgr
 		   || growth >= inline_insns_auto (e->caller, true)
 		   || !big_speedup_p (e)))
 	{
-	  /* growth_likely_positive is expensive, always test it last.  */
+	  /* growth_positive_p is expensive, always test it last.  */
           if (growth >= inline_insns_single (e->caller, false)
-	      || growth_likely_positive (callee, growth))
+	      || growth_positive_p (callee, e, growth))
 	    {
 	      if (opt_for_fn (e->caller->decl, optimize) >= 3)
 		e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
@@ -913,7 +915,7 @@ want_inline_small_function_p (struct cgr
       /* If call is cold, do not inline when function body would grow. */
       else if (!e->maybe_hot_p ()
 	       && (growth >= inline_insns_single (e->caller, false)
-		   || growth_likely_positive (callee, growth)))
+		   || growth_positive_p (callee, e, growth)))
 	{
           e->inline_failed = CIF_UNLIKELY_CALL;
 	  want_inline = false;
@@ -1075,7 +1077,7 @@ want_inline_function_to_all_callers_p (s
   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
     return false;
   /* Inlining into all callers would increase size?  */
-  if (estimate_growth (node) > 0)
+  if (growth_positive_p (node, NULL, INT_MIN) > 0)
     return false;
   /* All inlines must be possible.  */
   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,