From patchwork Tue Aug 17 17:00:18 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sebastian Pop X-Patchwork-Id: 61940 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 9E547B6EE9 for ; Wed, 18 Aug 2010 03:01:15 +1000 (EST) Received: (qmail 25506 invoked by alias); 17 Aug 2010 17:01:08 -0000 Received: (qmail 25362 invoked by uid 22791); 17 Aug 2010 17:00:59 -0000 X-SWARE-Spam-Status: No, hits=-1.7 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_FC, TW_QM, TW_QS, TW_TM, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-pw0-f47.google.com (HELO mail-pw0-f47.google.com) (209.85.160.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 17 Aug 2010 17:00:49 +0000 Received: by mail-pw0-f47.google.com with SMTP id 1so2461420pwj.20 for ; Tue, 17 Aug 2010 10:00:49 -0700 (PDT) Received: by 10.114.89.11 with SMTP id m11mr8170630wab.150.1282064449201; Tue, 17 Aug 2010 10:00:49 -0700 (PDT) Received: from napoca ([163.181.251.115]) by mx.google.com with ESMTPS id x9sm14686704waj.3.2010.08.17.10.00.46 (version=TLSv1/SSLv3 cipher=RC4-MD5); Tue, 17 Aug 2010 10:00:48 -0700 (PDT) Received: by napoca (sSMTP sendmail emulation); Tue, 17 Aug 2010 12:00:34 -0500 From: Sebastian Pop To: gcc-patches@gcc.gnu.org Cc: rguenther@suse.de, Sebastian Pop Subject: [PATCH 2/3] Do not check whether memory references accessed in every iteration trap. Date: Tue, 17 Aug 2010 12:00:18 -0500 Message-Id: <1282064419-13251-2-git-send-email-sebpop@gmail.com> In-Reply-To: <1282064419-13251-1-git-send-email-sebpop@gmail.com> References: <1282064419-13251-1-git-send-email-sebpop@gmail.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch relaxes the checks from gimple_could_trap_p in order to allow the flag_loop_if_convert_memory_writes to if-convert more loops in which it is possible to prove that: - the accesses to an array in a loop do not trap (more than the original non-if-converted loop). This is true when the memory accesses are executed at every iteration of the if-converted loop. - the writes to memory occur on arrays that are not const qualified. This is true when there exists at least one unconditional write to the array in the analyzed program. In this patch this analysis is limited to the loop to be if-converted. * gimple.c (gimple_could_trap_p_1): Not static anymore. Pass an extra bool parameter include_mem. (gimple_could_trap_p): Adjust call to gimple_could_trap_p_1. (gimple_assign_rhs_could_trap_p): Same. * gimple.h (gimple_could_trap_p_1): Declared. * tree-data-ref.h (same_data_refs_base_objects): New. (same_data_refs): New. * tree-if-conv.c (memref_read_or_written_unconditionally): New. (memrefs_read_or_written_unconditionally): New. (write_memref_written_at_least_once): New. (write_memrefs_written_at_least_once): New. (ifcvt_memrefs_wont_trap): New. (operations_could_trap): New. (ifcvt_could_trap_p): New. (if_convertible_gimple_assign_stmt_p): Call ifcvt_could_trap_p. Gets a vector of data refs. (if_convertible_stmt_p): Same. (if_convertible_loop_p_1): New. (if_convertible_loop_p): Call if_convertible_loop_p_1. * gcc.dg/tree-ssa/ifc-5.c: New. --- gcc/gimple.c | 30 ++-- gcc/gimple.h | 1 + gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c | 24 +++ gcc/tree-data-ref.h | 33 ++++ gcc/tree-if-conv.c | 283 +++++++++++++++++++++++++-------- 5 files changed, 288 insertions(+), 83 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c diff --git a/gcc/gimple.c b/gcc/gimple.c index e3de834..3498e9c 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2399,24 +2399,25 @@ gimple_rhs_has_side_effects (const_gimple s) return false; } - /* Helper for gimple_could_trap_p and gimple_assign_rhs_could_trap_p. - Return true if S can trap. If INCLUDE_LHS is true and S is a - GIMPLE_ASSIGN, the LHS of the assignment is also checked. - Otherwise, only the RHS of the assignment is checked. */ + Return true if S can trap. When INCLUDE_MEM is true, check whether + the memory operations could trap. When INCLUDE_STORES is true and + S is a GIMPLE_ASSIGN, the LHS of the assignment is also checked. */ -static bool -gimple_could_trap_p_1 (gimple s, bool include_lhs) +bool +gimple_could_trap_p_1 (gimple s, bool include_mem, bool include_stores) { - unsigned i, start; tree t, div = NULL_TREE; enum tree_code op; - start = (is_gimple_assign (s) && !include_lhs) ? 1 : 0; + if (include_mem) + { + unsigned i, start = (is_gimple_assign (s) && !include_stores) ? 1 : 0; - for (i = start; i < gimple_num_ops (s); i++) - if (tree_could_trap_p (gimple_op (s, i))) - return true; + for (i = start; i < gimple_num_ops (s); i++) + if (tree_could_trap_p (gimple_op (s, i))) + return true; + } switch (gimple_code (s)) { @@ -2445,26 +2446,23 @@ gimple_could_trap_p_1 (gimple s, bool include_lhs) } return false; - } - /* Return true if statement S can trap. */ bool gimple_could_trap_p (gimple s) { - return gimple_could_trap_p_1 (s, true); + return gimple_could_trap_p_1 (s, true, true); } - /* Return true if RHS of a GIMPLE_ASSIGN S can trap. */ bool gimple_assign_rhs_could_trap_p (gimple s) { gcc_assert (is_gimple_assign (s)); - return gimple_could_trap_p_1 (s, false); + return gimple_could_trap_p_1 (s, true, false); } diff --git a/gcc/gimple.h b/gcc/gimple.h index 7909986..545b271 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -886,6 +886,7 @@ void gimple_cond_set_condition_from_tree (gimple, tree); bool gimple_has_side_effects (const_gimple); bool gimple_rhs_has_side_effects (const_gimple); bool gimple_could_trap_p (gimple); +bool gimple_could_trap_p_1 (gimple, bool, bool); bool gimple_assign_rhs_could_trap_p (gimple); void gimple_regimplify_operands (gimple, gimple_stmt_iterator *); bool empty_body_p (gimple_seq); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c new file mode 100644 index 0000000..a9cc816 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-5.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-c -O2 -ftree-vectorize -fdump-tree-ifcvt-stats" { target *-*-* } } */ + +void +dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs) +{ + int i, level, qmul, qadd; + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + + for (i = 0; i <= nCoeffs; i++) + { + level = block[i]; + if (level < 0) + level = level * qmul - qadd; + else + level = level * qmul + qadd; + block[i] = level; + } +} + +/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */ +/* { dg-final { cleanup-tree-dump "ifcvt" } } */ diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 9e18e26..757c3c1 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -417,6 +417,39 @@ extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); extern bool dr_may_alias_p (const struct data_reference *, const struct data_reference *); + +/* Return true when the base objects of data references A and B are + the same memory object. */ + +static inline bool +same_data_refs_base_objects (data_reference_p a, data_reference_p b) +{ + return DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b) + && operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0); +} + +/* Return true when the data references A and B are accessing the same + memory object with the same access functions. */ + +static inline bool +same_data_refs (data_reference_p a, data_reference_p b) +{ + unsigned int i; + + /* The references are exactly the same. */ + if (operand_equal_p (DR_REF (a), DR_REF (b), 0)) + return true; + + if (!same_data_refs_base_objects (a, b)) + return false; + + for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) + if (!eq_evolutions_p (DR_ACCESS_FN (a, i), DR_ACCESS_FN (b, i))) + return false; + + return true; +} + /* Return true when the DDR contains two data references that have the same access functions. */ diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index b465311..1cf53f5 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -446,6 +446,144 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi) return true; } +/* Returns true when the memory reference A at position POS in the + data references vector DRS and executed under the condition CA, is + read or written unconditionally. In other words, this function + returns true when there exist other accesses to the same data + reference with predicates that add up (OR-up) to the true + predicate: this ensures that the data reference A is touched (read + or written) on every iteration of the if-converted loop. */ + +static bool +memref_read_or_written_unconditionally (int pos, data_reference_p a, + VEC (data_reference_p, heap) *drs, + tree ca) +{ + int i; + data_reference_p b; + + for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++) + if (i != pos && same_data_refs (a, b)) + { + tree cb = bb_predicate (gimple_bb (DR_STMT (b))); + + if (is_true_predicate (cb) + || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), + ca, cb))) + return true; + } + + return false; +} + +/* Returns true when the memory references of STMT are read or written + unconditionally. */ + +static bool +memrefs_read_or_written_unconditionally (gimple stmt, + VEC (data_reference_p, heap) *drs) +{ + int i; + data_reference_p a; + tree ca = bb_predicate (gimple_bb (stmt)); + + for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++) + if (DR_STMT (a) == stmt + && !memref_read_or_written_unconditionally (i, a, drs, ca)) + return false; + + return true; +} + + +/* Returns true when the memory reference A at position P in the data + references vector DRS and executed under the condition CA, is + unconditionally written. */ + +static bool +write_memref_written_at_least_once (int pos, data_reference_p a, + VEC (data_reference_p, heap) *drs, + tree ca) +{ + int i; + data_reference_p b; + + for (i = 0; VEC_iterate (data_reference_p, drs, i, b); i++) + if (i != pos + && !DR_IS_READ (b) + && same_data_refs_base_objects (a, b)) + { + tree cb = bb_predicate (gimple_bb (DR_STMT (b))); + + if (is_true_predicate (cb) + || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), + ca, cb))) + return true; + } + + return false; +} + +/* Returns true when the memory references of STMT are unconditionally + written. */ + +static bool +write_memrefs_written_at_least_once (gimple stmt, + VEC (data_reference_p, heap) *drs) +{ + int i; + data_reference_p a; + tree ca = bb_predicate (gimple_bb (stmt)); + + for (i = 0; VEC_iterate (data_reference_p, drs, i, a); i++) + if (DR_STMT (a) == stmt + && !DR_IS_READ (a) + && !write_memref_written_at_least_once (i, a, drs, ca)) + return false; + + return true; +} + +/* Return true when the memory references of STMT won't trap in the + if-converted code. There are two things that we have to check for: + + - writes to memory occur to writable memory: if-conversion of + memory writes transforms the conditional memory writes into + unconditional writes, i.e. "if (cond) A[i] = foo" is transformed + into "A[i] = cond ? foo : A[i]", and as the write to memory may not + be executed at all in the original code, it may be a readonly + memory. To check that A is not const-qualified, we check that + there exists at least an unconditional write to A in the current + function. + + - reads or writes to memory are valid memory accesses for every + iteration. To check that the memory accesses are correctly formed + and that we are allowed to read and write in these locations, we + check that the memory accesses to be if-converted occur at every + iteration unconditionally. */ + +static bool +ifcvt_memrefs_wont_trap (gimple stmt, VEC (data_reference_p, heap) *refs) +{ + return write_memrefs_written_at_least_once (stmt, refs) + && memrefs_read_or_written_unconditionally (stmt, refs); +} + +/* Wrapper around gimple_could_trap_p refined for the needs of the + if-conversion. Try to prove that the memory accesses of STMT could + not trap in the innermost loop containing STMT. */ + +static bool +ifcvt_could_trap_p (gimple stmt, VEC (data_reference_p, heap) *refs) +{ + if (gimple_vuse (stmt) + && !gimple_could_trap_p_1 (stmt, false, false) + && ifcvt_memrefs_wont_trap (stmt, refs)) + return false; + + return gimple_could_trap_p (stmt); +} + /* Return true when STMT is if-convertible. GIMPLE_ASSIGN statement is not if-convertible if, @@ -454,7 +592,8 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gimple phi) - LHS is not var decl. */ static bool -if_convertible_gimple_assign_stmt_p (gimple stmt) +if_convertible_gimple_assign_stmt_p (gimple stmt, + VEC (data_reference_p, heap) *refs) { tree lhs = gimple_assign_lhs (stmt); basic_block bb; @@ -482,7 +621,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt) if (flag_tree_loop_if_convert_memory_writes) { - if (gimple_could_trap_p (stmt)) + if (ifcvt_could_trap_p (stmt, refs)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "tree could trap...\n"); @@ -522,7 +661,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt) - it is a GIMPLE_LABEL or a GIMPLE_COND. */ static bool -if_convertible_stmt_p (gimple stmt) +if_convertible_stmt_p (gimple stmt, VEC (data_reference_p, heap) *refs) { switch (gimple_code (stmt)) { @@ -532,7 +671,7 @@ if_convertible_stmt_p (gimple stmt) return true; case GIMPLE_ASSIGN: - return if_convertible_gimple_assign_stmt_p (stmt); + return if_convertible_gimple_assign_stmt_p (stmt, refs); default: /* Don't know what to do with 'em so don't do anything. */ @@ -800,69 +939,24 @@ predicate_bbs (loop_p loop) return true; } -/* Return true when LOOP is if-convertible. - LOOP is if-convertible if: - - it is innermost, - - it has two or more basic blocks, - - it has only one exit, - - loop header is not the exit edge, - - if its basic blocks and phi nodes are if convertible. */ +/* Return true when LOOP is if-convertible. This is a helper function + for if_convertible_loop_p. REFS and DDRS are initialized and freed + in if_convertible_loop_p. */ static bool -if_convertible_loop_p (struct loop *loop) +if_convertible_loop_p_1 (struct loop *loop, + VEC (data_reference_p, heap) **refs, + VEC (ddr_p, heap) **ddrs) { + bool res; unsigned int i; - edge e; - edge_iterator ei; basic_block exit_bb = NULL; - /* Handle only innermost loop. */ - if (!loop || loop->inner) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "not innermost loop\n"); - return false; - } - - /* If only one block, no need for if-conversion. */ - if (loop->num_nodes <= 2) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "less than 2 basic blocks\n"); - return false; - } - - /* More than one loop exit is too much to handle. */ - if (!single_exit (loop)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "multiple exits\n"); - return false; - } - - /* ??? Check target's vector conditional operation support for vectorizer. */ - - /* If one of the loop header's edge is exit edge then do not apply - if-conversion. */ - FOR_EACH_EDGE (e, ei, loop->header->succs) - { - if (loop_exit_edge_p (loop, e)) - return false; - } - /* Don't if-convert the loop when the data dependences cannot be computed: the loop won't be vectorized in that case. */ - { - VEC (data_reference_p, heap) *refs = VEC_alloc (data_reference_p, heap, 5); - VEC (ddr_p, heap) *ddrs = VEC_alloc (ddr_p, heap, 25); - bool res = compute_data_dependences_for_loop (loop, true, &refs, &ddrs); - - free_data_refs (refs); - free_dependence_relations (ddrs); - - if (!res) - return false; - } + res = compute_data_dependences_for_loop (loop, true, refs, ddrs); + if (!res) + return false; calculate_dominance_info (CDI_DOMINATORS); @@ -886,7 +980,8 @@ if_convertible_loop_p (struct loop *loop) exit_bb = bb; } - if (!predicate_bbs (loop)) + res = predicate_bbs (loop); + if (!res) return false; for (i = 0; i < loop->num_nodes; i++) @@ -898,13 +993,11 @@ if_convertible_loop_p (struct loop *loop) if (!if_convertible_phi_p (loop, bb, gsi_stmt (itr))) return false; - /* For non predicated BBs, don't check their statements. */ - if (!is_predicated (bb)) - continue; - - for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) - if (!if_convertible_stmt_p (gsi_stmt (itr))) - return false; + /* Check the if-convertibility of statements in predicated BBs. */ + if (is_predicated (bb)) + for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) + if (!if_convertible_stmt_p (gsi_stmt (itr), *refs)) + return false; } if (dump_file) @@ -913,6 +1006,62 @@ if_convertible_loop_p (struct loop *loop) return true; } +/* Return true when LOOP is if-convertible. + LOOP is if-convertible if: + - it is innermost, + - it has two or more basic blocks, + - it has only one exit, + - loop header is not the exit edge, + - if its basic blocks and phi nodes are if convertible. */ + +static bool +if_convertible_loop_p (struct loop *loop) +{ + edge e; + edge_iterator ei; + bool res = false; + VEC (data_reference_p, heap) *refs; + VEC (ddr_p, heap) *ddrs; + + /* Handle only innermost loop. */ + if (!loop || loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "not innermost loop\n"); + return false; + } + + /* If only one block, no need for if-conversion. */ + if (loop->num_nodes <= 2) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "less than 2 basic blocks\n"); + return false; + } + + /* More than one loop exit is too much to handle. */ + if (!single_exit (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "multiple exits\n"); + return false; + } + + /* If one of the loop header's edge is an exit edge then do not + apply if-conversion. */ + FOR_EACH_EDGE (e, ei, loop->header->succs) + if (loop_exit_edge_p (loop, e)) + return false; + + refs = VEC_alloc (data_reference_p, heap, 5); + ddrs = VEC_alloc (ddr_p, heap, 25); + res = if_convertible_loop_p_1 (loop, &refs, &ddrs); + + free_data_refs (refs); + free_dependence_relations (ddrs); + return res; +} + /* Basic block BB has two predecessors. Using predecessor's bb predicate, set an appropriate condition COND for the PHI node replacement. Return the true block whose phi arguments are