From patchwork Sat Apr 14 05:53:52 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tom de Vries X-Patchwork-Id: 152475 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 603FBB6FE7 for ; Sat, 14 Apr 2012 15:54:25 +1000 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1334987666; h=Comment: DomainKey-Signature:Received:Received:Received:Received:Received: Received:Message-ID:Date:From:User-Agent:MIME-Version:To:CC: Subject:References:In-Reply-To:Content-Type:Mailing-List: Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:Sender:Delivered-To; bh=xnVyIU8vYk6fM6oFyUsugevgvbo=; b=XsSZrv4R0XHfwCtfsjwjo+ns8mmYO3uBrs4XwKvf6pp5a5cBmnhktnzQF4M6AX dkgHdha6ssAeNehWniiMHNdN6yn0oQekYdwVjTQFUbW+xlpGwey4HnAkogOY34hW bbC8MVSB7lbduOdZyVrM1cud+kxAqPK0VtlPF7WF08oG8= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Received:Message-ID:Date:From:User-Agent:MIME-Version:To:CC:Subject:References:In-Reply-To:Content-Type:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=XZ0A7TWL9JJT0BJs4HPWIEkOnfIlyk41xDxF+cz5vKx/VLfmFxc1xzGZVn+8QU RI8aVBqJX1mWiqK9fXmDtXWOXihy9biFGWyv+HYmf9Vy0hwRtTD/h4DBI1GD4d5i MqRA1L5VfJQyreTC0hoE6+cVqt04Uu1vYFKZnTW8Rs2P0=; Received: (qmail 5000 invoked by alias); 14 Apr 2012 05:54:19 -0000 Received: (qmail 4988 invoked by uid 22791); 14 Apr 2012 05:54:16 -0000 X-SWARE-Spam-Status: No, hits=-4.2 required=5.0 tests=AWL, BAYES_00, KHOP_RCVD_UNTRUST, KHOP_THREADED, RCVD_IN_HOSTKARMA_W, RCVD_IN_HOSTKARMA_WL, TW_TM X-Spam-Check-By: sourceware.org Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sat, 14 Apr 2012 05:54:01 +0000 Received: from svr-orw-exc-10.mgc.mentorg.com ([147.34.98.58]) by relay1.mentorg.com with esmtp id 1SIvvs-000426-Ei from Tom_deVries@mentor.com ; Fri, 13 Apr 2012 22:54:00 -0700 Received: from SVR-IES-FEM-01.mgc.mentorg.com ([137.202.0.104]) by SVR-ORW-EXC-10.mgc.mentorg.com with Microsoft SMTPSVC(6.0.3790.4675); Fri, 13 Apr 2012 22:53:34 -0700 Received: from [127.0.0.1] (137.202.0.76) by SVR-IES-FEM-01.mgc.mentorg.com (137.202.0.104) with Microsoft SMTP Server id 14.1.289.1; Sat, 14 Apr 2012 06:53:58 +0100 Message-ID: <4F8910F0.4010206@mentor.com> Date: Sat, 14 Apr 2012 07:53:52 +0200 From: Tom de Vries User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.28) Gecko/20120313 Lightning/1.0b2 Thunderbird/3.1.20 MIME-Version: 1.0 To: Richard Guenther CC: "gcc-patches@gcc.gnu.org" Subject: Re: [PATCH] Fix for PR52081 - Missed tail merging with pure calls References: <4F2A691D.8030403@mentor.com> <4F3903CC.1010800@mentor.com> In-Reply-To: Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On 06/03/12 15:21, Richard Guenther wrote: > On Mon, Feb 13, 2012 at 1:36 PM, Tom de Vries wrote: >> On 13/02/12 12:54, Richard Guenther wrote: >>> On Thu, Feb 2, 2012 at 11:44 AM, Tom de Vries wrote: >>>> Richard, >>>> >>>> this patch fixes PR52801. >>>> >>>> Consider test-case pr51879-12.c: >>>> ... >>>> __attribute__((pure)) int bar (int); >>>> __attribute__((pure)) int bar2 (int); >>>> void baz (int); >>>> >>>> int x, z; >>>> >>>> void >>>> foo (int y) >>>> { >>>> int a = 0; >>>> if (y == 6) >>>> { >>>> a += bar (7); >>>> a += bar2 (6); >>>> } >>>> else >>>> { >>>> a += bar2 (6); >>>> a += bar (7); >>>> } >>>> baz (a); >>>> } >>>> ... >>>> >>>> When compiling at -O2, pr51879-12.c.094t.pre looks like this: >>>> ... >>>> # BLOCK 3 freq:1991 >>>> # PRED: 2 [19.9%] (true,exec) >>>> # VUSE <.MEMD.1722_12(D)> >>>> # USE = nonlocal escaped >>>> D.1717_4 = barD.1703 (7); >>>> # VUSE <.MEMD.1722_12(D)> >>>> # USE = nonlocal escaped >>>> D.1718_6 = bar2D.1705 (6); >>>> aD.1713_7 = D.1717_4 + D.1718_6; >>>> goto ; >>>> # SUCC: 5 [100.0%] (fallthru,exec) >>>> >>>> # BLOCK 4 freq:8009 >>>> # PRED: 2 [80.1%] (false,exec) >>>> # VUSE <.MEMD.1722_12(D)> >>>> # USE = nonlocal escaped >>>> D.1720_8 = bar2D.1705 (6); >>>> # VUSE <.MEMD.1722_12(D)> >>>> # USE = nonlocal escaped >>>> D.1721_10 = barD.1703 (7); >>>> aD.1713_11 = D.1720_8 + D.1721_10; >>>> # SUCC: 5 [100.0%] (fallthru,exec) >>>> >>>> # BLOCK 5 freq:10000 >>>> # PRED: 3 [100.0%] (fallthru,exec) 4 [100.0%] (fallthru,exec) >>>> # aD.1713_1 = PHI >>>> # .MEMD.1722_13 = VDEF <.MEMD.1722_12(D)> >>>> # USE = nonlocal >>>> # CLB = nonlocal >>>> bazD.1707 (aD.1713_1); >>>> # VUSE <.MEMD.1722_13> >>>> return; >>>> ... >>>> block 3 and 4 can be tail-merged. >>>> >>>> Value numbering numbers the two phi arguments a_7 and a_11 the same so the >>>> problem is not in value numbering: >>>> ... >>>> Setting value number of a_11 to a_7 (changed) >>>> ... >>>> >>>> There are 2 reasons that tail_merge_optimize doesn't optimize this: >>>> >>>> 1. >>>> The clause >>>> is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) >>>> && !gimple_has_side_effects (stmt) >>>> used in both same_succ_hash and gsi_advance_bw_nondebug_nonlocal evaluates to >>>> false for pure calls. >>>> This is fixed by replacing is_gimple_assign with gimple_has_lhs. >>>> >>>> 2. >>>> In same_succ_equal we check gimples from the 2 bbs side-by-side: >>>> ... >>>> gsi1 = gsi_start_nondebug_bb (bb1); >>>> gsi2 = gsi_start_nondebug_bb (bb2); >>>> while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) >>>> { >>>> s1 = gsi_stmt (gsi1); >>>> s2 = gsi_stmt (gsi2); >>>> if (gimple_code (s1) != gimple_code (s2)) >>>> return 0; >>>> if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) >>>> return 0; >>>> gsi_next_nondebug (&gsi1); >>>> gsi_next_nondebug (&gsi2); >>>> } >>>> ... >>>> and we'll be comparing 'bar (7)' and 'bar2 (6)', and gimple_call_same_target_p >>>> will return false. >>>> This is fixed by ignoring local defs in this comparison, by using >>>> gsi_advance_fw_nondebug_nonlocal on the iterators. >>>> >>>> bootstrapped and reg-tested on x86_64. >>>> >>>> ok for stage1? >>> >>> Sorry for responding so late ... >> >> no problem :) >> >>> I think these fixes hint at that we should >>> use "structural" equality as fallback if value-numbering doesn't equate >>> two stmt effects. Thus, treat two stmts with exactly the same operands >>> and flags as equal and using value-numbering to canonicalize operands >>> (when they are SSA names) for that comparison, or use VN entirely >>> if there are no side-effects on the stmt. >>> >>> Changing value-numbering of virtual operands, even if it looks correct in the >>> simple cases you change, doesn't look like a general solution for the missed >>> tail merging opportunities. >>> >> >> Your comment is relevant for the other recent tail-merge related fixes I >> submitted, but I think not for this one. >> >> In this case, value-numbering manages to value number the 2 phi-alternatives >> equal. It's tail-merging that doesn't take advantage of this, by treating pure >> function calls the same as non-pure function calls. The fixes are therefore in >> tail-merging, not in value numbering. >> >> So, ok for stage1? > > I see. A few comments. > > +/* Returns whether VAL is used in the same bb as in which it is defined, or > + in the phi of a successor bb. */ > + > +static bool > +local_def (tree val) > +{ > + gimple stmt, def_stmt; > + basic_block bb, def_bb; > + imm_use_iterator iter; > + bool res; > + > + if (TREE_CODE (val) != SSA_NAME) > + return false; > > what about SSA_NAME_IS_DEFAULT_DEF names? They have a def-stmt > with a NULL basic-block. As you suggested below, I've inlined local_def into stmt_local_def, so val is now initialized with DEF_FROM_PTR (SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF)). Which means that SSA_NAME_IS_DEFAULT_DEF (val) is false. > > + res = true; > + FOR_EACH_IMM_USE_STMT (stmt, iter, val) > + { > + bb = gimple_bb (stmt); > + if (bb == def_bb) > + continue; > + if (gimple_code (stmt) == GIMPLE_PHI > + && find_edge (def_bb, bb)) > + continue; > + res = false; > + BREAK_FROM_IMM_USE_STMT (iter); > > I'd use FOR_EACH_IMM_USE_FAST here, that should be faster (avoids > the iterator marker writes), Done. > and find_edge can be replaced by > PHI_ARG_INDEX_FROM_USE () - well, you get the edge index that way, > so EDGE_PRED (def_bb, PHI_ARG_INDEX_FROM_USE (use_p)) == bb > would be the condition to test. > Done. > local_def seems to be only used from stmt_local_def, consider > inlining it there instead. Done. > Btw, what about other DEFs of a stmt > than the lhs? I'd say you should use single_ssa_def_operand () > to get at the DEF, not gimple_get_lhs. > Done. > Ok with that changes. Due to the fix for PR52734, I don't bother anymore about factoring out commonality between gsi_advance_fw_nondebug_nonlocal and gsi_advance_bw_nondebug_nonlocal, so the patch is a bit smaller now. Committed to trunk and attached. Thanks, - Tom > > Thanks, > Richard. 2012-04-14 Tom de Vries * tree-ssa-tail-merge.c (stmt_local_def): New function, factored out of same_succ_hash, with local_def inlined. Use SINGLE_SSA_DEF_OPERAND. Use FOR_EACH_IMM_USE_FAST instead of FOR_EACH_IMM_USE_STMT. Remove use of find_edge. (gsi_advance_fw_nondebug_nonlocal): New function. (local_def): Removed function. (same_succ_hash): Use stmt_local_def. (same_succ_equal): Use gsi_advance_fw_nondebug_nonlocal. (gsi_advance_bw_nondebug_nonlocal): Use stmt_local_def. * gcc.dg/pr51879-12.c: New test. diff -u gcc/tree-ssa-tail-merge.c (working copy) gcc/tree-ssa-tail-merge.c (working copy) --- gcc/tree-ssa-tail-merge.c (working copy) +++ gcc/tree-ssa-tail-merge.c (working copy) @@ -269,6 +269,67 @@ #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) +/* Returns true if the only effect a statement STMT has, is to define locally + used SSA_NAMEs. */ + +static bool +stmt_local_def (gimple stmt) +{ + basic_block bb, def_bb; + imm_use_iterator iter; + use_operand_p use_p; + tree val; + def_operand_p def_p; + + if (gimple_has_side_effects (stmt)) + return false; + + def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); + if (def_p == NULL) + return false; + + val = DEF_FROM_PTR (def_p); + if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) + return false; + + def_bb = gimple_bb (stmt); + + FOR_EACH_IMM_USE_FAST (use_p, iter, val) + { + if (is_gimple_debug (USE_STMT (use_p))) + continue; + bb = gimple_bb (USE_STMT (use_p)); + if (bb == def_bb) + continue; + + if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI + && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) + continue; + + return false; + } + + return true; +} + +/* Let GSI skip forwards over local defs. */ + +static void +gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) +{ + gimple stmt; + + while (true) + { + if (gsi_end_p (*gsi)) + return; + stmt = gsi_stmt (*gsi); + if (!stmt_local_def (stmt)) + return; + gsi_next_nondebug (gsi); + } +} + /* VAL1 and VAL2 are either: - uses in BB1 and BB2, or - phi alternatives for BB1 and BB2. @@ -352,39 +413,6 @@ update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); } -/* Returns whether VAL is used in the same bb as in which it is defined, or - in the phi of a successor bb. */ - -static bool -local_def (tree val) -{ - gimple stmt, def_stmt; - basic_block bb, def_bb; - imm_use_iterator iter; - bool res; - - if (TREE_CODE (val) != SSA_NAME) - return false; - def_stmt = SSA_NAME_DEF_STMT (val); - def_bb = gimple_bb (def_stmt); - - res = true; - FOR_EACH_IMM_USE_STMT (stmt, iter, val) - { - if (is_gimple_debug (stmt)) - continue; - bb = gimple_bb (stmt); - if (bb == def_bb) - continue; - if (gimple_code (stmt) == GIMPLE_PHI - && find_edge (def_bb, bb)) - continue; - res = false; - BREAK_FROM_IMM_USE_STMT (iter); - } - return res; -} - /* Calculates hash value for same_succ VE. */ static hashval_t @@ -408,8 +436,7 @@ { stmt = gsi_stmt (gsi); stmt_update_dep_bb (stmt); - if (is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) - && !gimple_has_side_effects (stmt)) + if (stmt_local_def (stmt)) continue; size++; @@ -525,6 +552,8 @@ gsi1 = gsi_start_nondebug_bb (bb1); gsi2 = gsi_start_nondebug_bb (bb2); + gsi_advance_fw_nondebug_nonlocal (&gsi1); + gsi_advance_fw_nondebug_nonlocal (&gsi2); while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) { s1 = gsi_stmt (gsi1); @@ -535,6 +564,8 @@ return 0; gsi_next_nondebug (&gsi1); gsi_next_nondebug (&gsi2); + gsi_advance_fw_nondebug_nonlocal (&gsi1); + gsi_advance_fw_nondebug_nonlocal (&gsi2); } return 1; @@ -1148,8 +1179,7 @@ *vuse_escaped = true; } - if (!(is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) - && !gimple_has_side_effects (stmt))) + if (!stmt_local_def (stmt)) return; gsi_prev_nondebug (gsi); } --- /dev/null (new file) +++ gcc/testsuite/gcc.dg/pr51879-12.c (revision 0) @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-pre" } */ + +__attribute__((pure)) int bar (int); +__attribute__((pure)) int bar2 (int); +void baz (int); + +int x, z; + +void +foo (int y) +{ + int a = 0; + if (y == 6) + { + a += bar (7); + a += bar2 (6); + } + else + { + a += bar2 (6); + a += bar (7); + } + baz (a); +} + +/* { dg-final { scan-tree-dump-times "bar \\(" 1 "pre"} } */ +/* { dg-final { scan-tree-dump-times "bar2 \\(" 1 "pre"} } */ +/* { dg-final { cleanup-tree-dump "pre" } } */