From patchwork Thu Nov 28 14:22:15 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Hubicka X-Patchwork-Id: 1202083 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-514769-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=ucw.cz Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="t/OpGAKp"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 47P0GF385Fz9sPJ for ; Fri, 29 Nov 2019 01:22:27 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=FLhcEV2xpwpQnnUdocOlFM8c4GDyIxwIMVBi3s4f4wR2lIf0AfMLP tV6TBvCUP6GHgXoNXMNqu8W+xYVMSL0V1wZJEUlrlTeo45A/aMCfNZJ4RP8wn+0N nCUHz8FOmYBcYOuxclI8yyJWSC3qZTl/l9u5wWjIQmRpcKi6Ld1JXU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=Da6/70mg2StaP9C1gcCBf9fIP+o=; b=t/OpGAKppz1Cp5VrSiKA 8cBB/O1YsMBn9jRbcCtrZJ4Gn2JIo4iVIncRhOcuZJAh0163NWuPg7g8haiwpAi6 X8zqR+qwIEjdClmLL9rreV3WJ+0tBYYfSIqQypc6V5XEGirM6NKbMmp/kX6QafqS EDOTJW9m2G/yI/6Mz5rfPak= Received: (qmail 90452 invoked by alias); 28 Nov 2019 14:22:21 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 90444 invoked by uid 89); 28 Nov 2019 14:22:20 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-10.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.1 spammy=cgraph_node, basic_block, profiles X-HELO: nikam.ms.mff.cuni.cz Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz) (195.113.20.16) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 28 Nov 2019 14:22:18 +0000 Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id A776C280822; Thu, 28 Nov 2019 15:22:15 +0100 (CET) Date: Thu, 28 Nov 2019 15:22:15 +0100 From: Jan Hubicka To: gcc-patches@gcc.gnu.org Subject: Fix scaling of profiles in ipa_merge_profiles Message-ID: <20191128142215.d7tsr6jtt5llvv6j@kam.mff.cuni.cz> MIME-Version: 1.0 Content-Disposition: inline User-Agent: NeoMutt/20170113 (1.7.2) Hi this patch fixes two problems in ipa_merge_profiles. First we allow cfg profile to diverge from cgraph profile and prior summing cfg profiles we must compensate for this change. Second the function is trying to preserve as much information as possible (for example to handle cases one function has guessed profile and other function has IPA profile) but it does so independently on each proflie counter which is not good since all type transitions must be done same way in order for resulting profile to be meaninful. In partiuclar the code sometimes makes node->count to be global count while some edges gets globa0 counters which is not meaningful and leads to ICE with sanity checking I want to commit incrementally. profiledbootstrapped x86_64-linux, comitted. * ipa-utils.c (ipa_merge_profiles): Be sure that all type transtions of counters are done same way. Index: ipa-utils.c =================================================================== --- ipa-utils.c (revision 278681) +++ ipa-utils.c (working copy) @@ -398,6 +398,7 @@ ipa_merge_profiles (struct cgraph_node * tree oldsrcdecl = src->decl; struct function *srccfun, *dstcfun; bool match = true; + bool copy_counts = false; if (!src->definition || !dst->definition) @@ -429,10 +430,26 @@ ipa_merge_profiles (struct cgraph_node * } profile_count orig_count = dst->count; - if (dst->count.initialized_p () && dst->count.ipa () == dst->count) - dst->count += src->count.ipa (); - else - dst->count = src->count.ipa (); + /* Either sum the profiles if both are IPA and not global0, or + pick more informative one (that is nonzero IPA if other is + uninitialized, guessed or global0). */ + + if ((dst->count.ipa ().nonzero_p () + || src->count.ipa ().nonzero_p ()) + && dst->count.ipa ().initialized_p () + && src->count.ipa ().initialized_p ()) + dst->count = dst->count.ipa () + src->count.ipa (); + else if (dst->count.ipa ().initialized_p ()) + ; + else if (src->count.ipa ().initialized_p ()) + { + copy_counts = true; + dst->count = src->count.ipa (); + } + + /* If no updating needed return early. */ + if (dst->count == orig_count) + return; /* First handle functions with no gimple body. */ if (dst->thunk.thunk_p || dst->alias @@ -544,6 +561,16 @@ ipa_merge_profiles (struct cgraph_node * struct cgraph_edge *e, *e2; basic_block srcbb, dstbb; + /* Function and global profile may be out of sync. First scale it same + way as fixup_cfg would. */ + profile_count srcnum = src->count; + profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count; + bool srcscale = srcnum.initialized_p () && !(srcnum == srcden); + profile_count dstnum = orig_count; + profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count; + bool dstscale = !copy_counts + && dstnum.initialized_p () && !(dstnum == dstden); + /* TODO: merge also statement histograms. */ FOR_ALL_BB_FN (srcbb, srccfun) { @@ -551,15 +578,15 @@ ipa_merge_profiles (struct cgraph_node * dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index); - /* Either sum the profiles if both are IPA and not global0, or - pick more informative one (that is nonzero IPA if other is - uninitialized, guessed or global0). */ - if (!dstbb->count.ipa ().initialized_p () - || (dstbb->count.ipa () == profile_count::zero () - && (srcbb->count.ipa ().initialized_p () - && !(srcbb->count.ipa () == profile_count::zero ())))) + profile_count srccount = srcbb->count; + if (srcscale) + srccount = srccount.apply_scale (srcnum, srcden); + if (dstscale) + dstbb->count = dstbb->count.apply_scale (dstnum, dstden); + + if (copy_counts) { - dstbb->count = srcbb->count; + dstbb->count = srccount; for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) { edge srce = EDGE_SUCC (srcbb, i); @@ -568,18 +595,21 @@ ipa_merge_profiles (struct cgraph_node * dste->probability = srce->probability; } } - else if (srcbb->count.ipa ().initialized_p () - && !(srcbb->count.ipa () == profile_count::zero ())) + else { for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) { edge srce = EDGE_SUCC (srcbb, i); edge dste = EDGE_SUCC (dstbb, i); dste->probability = - dste->probability * dstbb->count.probability_in (dstbb->count + srcbb->count) - + srce->probability * srcbb->count.probability_in (dstbb->count + srcbb->count); + dste->probability * dstbb->count.ipa ().probability_in + (dstbb->count.ipa () + + srccount.ipa ()) + + srce->probability * srcbb->count.ipa ().probability_in + (dstbb->count.ipa () + + srccount.ipa ()); } - dstbb->count += srcbb->count; + dstbb->count = dstbb->count.ipa () + srccount.ipa (); } } push_cfun (dstcfun);