From patchwork Sat Jun 10 23:08:40 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Hubicka X-Patchwork-Id: 774287 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3wlZbX1Fwxz9s7M for ; Sun, 11 Jun 2017 09:08:54 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="oYtj4jKN"; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=Y510rmHeNH2vHl5SoDMkWdA2ycQmD4e9CDgAlO9dE8BW39TTNPgSp Sf7LdkTBYQfJb/p82+VL7tdaJdlUsRPj2BrQDeWkxeFp9/QNwHFkl8SQauo6ygfx kKHPCYZUpy6XehNDQSyR+psevyQQK+NFkMg+VC+OLzQZWouSFhvQHE= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=X88VyTBddKtNpfbbBtAkmY/0fPo=; b=oYtj4jKNBy+hf6tjvzHA zLkBIHAQvxcKOGxM/ataCcWeEySD//gtyOOgtdNgt3+W7K+3VTzmm03RlmiJIV7H /lf3T5Er7NvxZZbgef1NzKeCks1tGGZLLMyEFM96mplcrOPtF7U0GIvazbZPitXg 7iCuBKDeMX5T36dm63uCGpI= Received: (qmail 100181 invoked by alias); 10 Jun 2017 23:08:42 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 100167 invoked by uid 89); 10 Jun 2017 23:08:41 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-9.4 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS, KAM_LAZY_DOMAIN_SECURITY, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=sk:specula, 20309, 91114 X-HELO: nikam.ms.mff.cuni.cz Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz) (195.113.20.16) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sat, 10 Jun 2017 23:08:39 +0000 Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id 9D81854815A; Sun, 11 Jun 2017 01:08:40 +0200 (CEST) Date: Sun, 11 Jun 2017 01:08:40 +0200 From: Jan Hubicka To: gcc-patches@gcc.gnu.org Subject: Make profile scaling during cloning more precise Message-ID: <20170610230840.GB11824@kam.mff.cuni.cz> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) Hi, this patch makes profile scaling to use same logic in tree-inline and cgraphclones. This reduces roundoff errors and makes it more easy to propagate more info. Compiling tramp3d with profile feedback the number of mismatches after inlining goes down from 248 to 95. Honza * cgraph.h (cgraph_edge::clone): Update prototype. * cgraphclones.c (cgraph_edge::clone): Update profile scaling. (cgraph_node::create_clone): Update. (cgraph_node::create_version_clone): Update. * tree-inline.c (copy_bb): Update. (expand_call_inline): Update. Index: cgraph.h =================================================================== --- cgraph.h (revision 249092) +++ cgraph.h (working copy) @@ -1649,7 +1649,7 @@ struct GTY((chain_next ("%h.next_caller" /* Create clone of edge in the node N represented by CALL_EXPR the callgraph. */ cgraph_edge * clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, - gcov_type count_scale, int freq_scale, + profile_count num, profile_count den, int freq_scale, bool update_original); /* Verify edge count and frequency. */ Index: cgraphclones.c =================================================================== --- cgraphclones.c (revision 249092) +++ cgraphclones.c (working copy) @@ -86,10 +86,13 @@ along with GCC; see the file COPYING3. cgraph_edge * cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, - gcov_type count_scale, int freq_scale, bool update_original) + profile_count num, profile_count den, + int freq_scale, bool update_original) { cgraph_edge *new_edge; - profile_count gcov_count = count.apply_scale (count_scale, REG_BR_PROB_BASE); + profile_count gcov_count + = (num == profile_count::zero () || den > 0) + ? count.apply_scale (num, den) : count; gcov_type freq; /* We do not want to ignore loop nest after frequency drops to 0. */ @@ -116,7 +119,7 @@ cgraph_edge::clone (cgraph_node *n, gcal { new_edge = n->create_indirect_edge (call_stmt, indirect_info->ecf_flags, - count, freq, false); + gcov_count, freq, false); *new_edge->indirect_info = *indirect_info; } } @@ -428,7 +431,6 @@ cgraph_node::create_clone (tree new_decl { cgraph_node *new_node = symtab->create_empty (); cgraph_edge *e; - gcov_type count_scale; unsigned i; if (new_inlined_to) @@ -453,7 +455,6 @@ cgraph_node::create_clone (tree new_decl new_node->global = global; new_node->global.inlined_to = new_inlined_to; new_node->rtl = rtl; - new_node->count = count; new_node->frequency = frequency; new_node->tp_first_run = tp_first_run; new_node->tm_clone = tm_clone; @@ -475,18 +476,6 @@ cgraph_node::create_clone (tree new_decl else new_node->clone.combined_args_to_skip = args_to_skip; - if (count.initialized_p ()) - { - if (new_node->count > count) - count_scale = REG_BR_PROB_BASE; - else - count_scale = new_node->count.probability_in (count); - } - else - count_scale = 0; - if (update_original) - count -= prof_count; - FOR_EACH_VEC_ELT (redirect_callers, i, e) { /* Redirect calls to the old version node to point to its new @@ -500,12 +489,12 @@ cgraph_node::create_clone (tree new_decl new_node->expand_all_artificial_thunks (); for (e = callees;e; e=e->next_callee) - e->clone (new_node, e->call_stmt, e->lto_stmt_uid, count_scale, + e->clone (new_node, e->call_stmt, e->lto_stmt_uid, new_node->count, count, freq, update_original); for (e = indirect_calls; e; e = e->next_callee) e->clone (new_node, e->call_stmt, e->lto_stmt_uid, - count_scale, freq, update_original); + new_node->count, count, freq, update_original); new_node->clone_references (this); new_node->next_sibling_clone = clones; @@ -514,6 +503,9 @@ cgraph_node::create_clone (tree new_decl clones = new_node; new_node->clone_of = this; + if (update_original) + count -= prof_count; + if (call_duplication_hook) symtab->call_cgraph_duplication_hooks (this, new_node); @@ -911,14 +903,14 @@ cgraph_node::create_version_clone (tree if (!bbs_to_copy || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index)) e->clone (new_version, e->call_stmt, - e->lto_stmt_uid, REG_BR_PROB_BASE, + e->lto_stmt_uid, count, count, CGRAPH_FREQ_BASE, true); for (e = indirect_calls; e; e=e->next_callee) if (!bbs_to_copy || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index)) e->clone (new_version, e->call_stmt, - e->lto_stmt_uid, REG_BR_PROB_BASE, + e->lto_stmt_uid, count, count, CGRAPH_FREQ_BASE, true); FOR_EACH_VEC_ELT (redirect_callers, i, e) Index: tree-inline.c =================================================================== --- tree-inline.c (revision 249092) +++ tree-inline.c (working copy) @@ -2009,7 +2009,9 @@ copy_bb (copy_body_data *id, basic_block struct cgraph_edge *old_edge = edge; edge = edge->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); /* We could also just rescale the frequency, but doing so would introduce roundoff errors and make @@ -2028,7 +2030,9 @@ copy_bb (copy_body_data *id, basic_block old_edge->speculative_call_info (direct, indirect, ref); indirect = indirect->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); if (old_edge->frequency + indirect->frequency) { @@ -4509,7 +4522,9 @@ expand_call_inline (basic_block bb, gimp cg_edge->remove (); edge = id->src_node->callees->clone (id->dst_node, call_stmt, gimple_uid (stmt), - REG_BR_PROB_BASE, CGRAPH_FREQ_BASE, + profile_count::one (), + profile_count::one (), + CGRAPH_FREQ_BASE, true); edge->frequency = freq; edge->count = count;