===================================================================
@@ -1,5 +1,5 @@
/* This file contains the list of the debug counter for GCC.
- Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of GCC.
@@ -184,3 +184,4 @@
DEBUG_COUNTER (store_motion)
DEBUG_COUNTER (split_for_sched2)
DEBUG_COUNTER (tail_call)
+DEBUG_COUNTER (lower_addr)
===================================================================
@@ -0,0 +1,924 @@
+/* Expose target addressing modes in late gimple representation.
+ Copyright (C) 2011
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* This pass performs a scan over the gimple representation to expose
+ target machine addressing in reference-class expressions. This is
+ necessary for some targets for which the current address
+ canonicalization is suboptimal. Induction variable optimizations
+ already expose target addressing for many (but not all) expressions
+ in loops, so this has most benefit in non-loop code. Algorithms
+ used here are simplifications of those used in the induction
+ variable optimizations. */
+
+/* EXPERTS: Following is a fairly common pattern that results with
+ the current algorithm:
+
+ <bb 2>:
+ D.2154_8 = n_2(D) * 4;
+ D.2107_3 = MEM[base: p_1(D), index: D.2154_8, offset: 0B];
+ D.2156_10 = p_1(D) + D.2154_8;
+ D.2108_4 = MEM[(struct x *)D.2156_10 + 128B];
+ D.2109_5 = MEM[(struct x *)D.2156_10 + 64B];
+ foo (D.2107_3, D.2108_4, D.2109_5);
+ return;
+
+ When the addressing expression consists of two tree addends and
+ a non-zero offset, the addends are pre-added and used as the base
+ expression, with the offset becoming the immediate operand in the
+ storage op. When the offset is zero, the base and index are
+ added implicitly in an indexed-form storage op.
+
+ The bias to the indexed form with a zero offset is good with no
+ knowledge of the surrounding code; it saves an instruction
+ and a dependency with a possible small increase in register
+ pressure. As can be seen here, though, we end up with a missed
+ commoning opportunity, since the second instruction could become:
+
+ D.2107_3 = MEM[(struct x *)D.2156_10 + 0B];
+
+ Since the add into D.2156_10 is going to occur anyway, the
+ advantage of the bias to indexed form vanishes.
+
+ I think we want to keep the current bias in tree-ssa-address.c,
+ but fix up these cases when they occur. The best thing would
+ probably be to post-process each block looking for these
+ opportunities and changing the zero-offset forms only when
+ profitable to common. I'm curious if you might see a more
+ efficient approach. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "basic-block.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "gimple.h"
+#include "tree-affine.h"
+#include "tree-pretty-print.h"
+#include "dbgcnt.h"
+#include "alloc-pool.h"
+
+/* Hash table for remembering local expressions that contribute to
+ address calculations. */
+static htab_t local_addr_table;
+
+/* Hash table entry. */
+typedef struct local_addr_expr_d
+{
+ /* The expression consists of an operation and 1 or 2 operands. */
+ enum tree_code operation;
+ tree operand1;
+ tree operand2;
+
+ /* Its representative SSA name. */
+ tree repr;
+
+ /* Cached hash code. */
+ hashval_t hashcode;
+} local_addr_expr, *local_addr_expr_t;
+
+typedef const struct local_addr_expr_d *const_local_addr_expr_t;
+
+/* Allocation pool for hash table entries. */
+static alloc_pool local_addr_pool;
+
+/* Callback to produce a hash value for an expression. */
+static hashval_t
+local_addr_hash (const void *p)
+{
+ const_local_addr_expr_t const expr = (const_local_addr_expr_t) p;
+ return expr->hashcode;
+}
+
+/* Callback when an element is removed from the hash table. */
+static void
+local_addr_free (void *p)
+{
+ pool_free (local_addr_pool, p);
+}
+
+/* Return true if two leaf nodes are equal. */
+static int
+leaves_eq (tree opnd1, tree opnd2)
+{
+ tree subopnd1, subopnd2;
+ enum tree_code code1, code2;
+
+ if (TREE_CODE (opnd1) != TREE_CODE (opnd2))
+ return false;
+
+ switch (TREE_CODE (opnd1))
+ {
+ case SSA_NAME:
+ return (DECL_UID (SSA_NAME_VAR (opnd1)) ==
+ DECL_UID (SSA_NAME_VAR (opnd2)) &&
+ SSA_NAME_VERSION (opnd1) == SSA_NAME_VERSION (opnd2));
+ case INTEGER_CST:
+ return double_int_equal_p (TREE_INT_CST (opnd1), TREE_INT_CST (opnd2));
+ case ADDR_EXPR:
+ /* &var or &parm. */
+ subopnd1 = TREE_OPERAND (opnd1, 0);
+ subopnd2 = TREE_OPERAND (opnd2, 0);
+ code1 = TREE_CODE (subopnd1);
+ code2 = TREE_CODE (subopnd2);
+ gcc_assert (code1 == code2);
+ gcc_assert (code1 == VAR_DECL || code1 == PARM_DECL);
+ return (DECL_UID (subopnd1) == DECL_UID (subopnd2));
+ default:
+ gcc_assert (false);
+ }
+
+ return true;
+}
+
+/* Callback for hashtab support. Return true if two expressions are
+ equal. We are only interested in simple expressions involving SSA
+ names, addresses of vars or parms, integer constants, addition,
+ subtraction, multiplication, and division. */
+static int
+local_addr_eq (const void *p1, const void *p2)
+{
+ const_local_addr_expr_t const entry1 = (const_local_addr_expr_t) p1;
+ const_local_addr_expr_t const entry2 = (const_local_addr_expr_t) p2;
+ tree left_opnd1 = entry1->operand1;
+ tree left_opnd2 = entry2->operand1;
+ tree right_opnd1 = entry1->operand2;
+ tree right_opnd2 = entry2->operand2;
+
+ if (entry1->operation != entry2->operation)
+ return false;
+
+ if (TREE_CODE (left_opnd1) != TREE_CODE (left_opnd2))
+ return false;
+
+ if (entry1->operation == SSA_NAME || entry1->operation == NOP_EXPR)
+ return (DECL_UID (SSA_NAME_VAR (left_opnd1)) ==
+ DECL_UID (SSA_NAME_VAR (left_opnd2)) &&
+ SSA_NAME_VERSION (left_opnd1) == SSA_NAME_VERSION (left_opnd2));
+
+ if (TREE_CODE (right_opnd1) != TREE_CODE (right_opnd2))
+ return false;
+
+ if (leaves_eq (left_opnd1, left_opnd2) &&
+ leaves_eq (right_opnd1, right_opnd2))
+ return true;
+
+ /* Handle commutative operations.
+ #### Of the DIV operations, only EXACT has been seen so far.
+ Change others to asserts with an eye toward removal.
+ EXPERTS: Can TRUNC_DIV or FLOOR_DIV appear in addr exprs? */
+ if (entry1->operation == MINUS_EXPR ||
+ entry1->operation == TRUNC_DIV_EXPR ||
+ entry1->operation == FLOOR_DIV_EXPR ||
+ entry1->operation == EXACT_DIV_EXPR)
+ return false;
+
+ return (leaves_eq (left_opnd1, right_opnd2) &&
+ leaves_eq (right_opnd1, left_opnd2));
+}
+
+/* Provide a hash code for SSA_NAME, INTEGER_CST, and ADDR_EXPR of
+ var/parm operands. */
+static hashval_t
+hash_simple_op (tree operand)
+{
+ if (TREE_CODE (operand) == SSA_NAME)
+ return DECL_UID (SSA_NAME_VAR (operand)) * SSA_NAME_VERSION (operand);
+ else if (TREE_CODE (operand) == INTEGER_CST)
+ return (hashval_t)TREE_INT_CST_LOW (operand);
+ else if (TREE_CODE (operand) == ADDR_EXPR)
+ {
+ tree subopnd = TREE_OPERAND (operand, 0);
+ gcc_assert (TREE_CODE (subopnd) == VAR_DECL ||
+ TREE_CODE (subopnd) == PARM_DECL);
+ return DECL_UID (subopnd);
+ }
+
+ gcc_unreachable ();
+ return 0;
+}
+
+/* Allocate a new local_addr_expr from the pool and fill it in. */
+static local_addr_expr_t
+alloc_local_addr_expr (enum tree_code operation_in, tree operand1_in,
+ tree operand2_in, tree repr_in,
+ hashval_t hashcode_in)
+{
+ local_addr_expr_t entry = (local_addr_expr_t)pool_alloc (local_addr_pool);
+
+ entry->operation = operation_in;
+ entry->operand1 = operand1_in;
+ entry->operand2 = operand2_in;
+ entry->repr = repr_in;
+ entry->hashcode = hashcode_in;
+
+ return entry;
+}
+
+/* Look up NAME in the hash table and, if found, return the entry of its
+ representative. Otherwise make it its own representative and return
+ that entry. */
+static local_addr_expr_t
+local_addr_ssa_name_cse (tree name)
+{
+ local_addr_expr_t new_entry =
+ alloc_local_addr_expr (SSA_NAME, name, NULL_TREE, name,
+ hash_simple_op (name));
+
+ void **slot = htab_find_slot_with_hash (local_addr_table, new_entry,
+ new_entry->hashcode, INSERT);
+ if (!*slot)
+ *slot = new_entry;
+ return (local_addr_expr_t)*slot;
+}
+
+/* The given RHS was found on a NOP_EXPR. Look it up in the hash table;
+ if it wasn't there previously, make LHS its representative.
+ Otherwise keep the existing representative. Return a pointer to
+ its hash table entry. */
+static local_addr_expr_t
+local_addr_nop_expr_cse (tree lhs, tree rhs)
+{
+ local_addr_expr_t new_entry =
+ alloc_local_addr_expr (NOP_EXPR, rhs, NULL_TREE, lhs,
+ hash_simple_op (rhs));
+ void **slot = htab_find_slot_with_hash (local_addr_table, new_entry,
+ new_entry->hashcode, INSERT);
+ if (!*slot)
+ *slot = new_entry;
+ return (local_addr_expr_t)*slot;
+}
+
+/* Perform CSE on OPND, possibly replacing it with a representative
+ SSA name. Returns true iff the tree rooted at *OPND has been
+ modified. */
+static bool
+local_addr_expr_cse (tree *opnd)
+{
+ enum tree_code code;
+ local_addr_expr_t entry_ptr;
+ unsigned int opnd_idx;
+ bool changed = false;
+
+ if (!*opnd)
+ return false;
+
+ code = TREE_CODE (*opnd);
+
+ switch (code)
+ {
+ case SSA_NAME:
+ entry_ptr = local_addr_ssa_name_cse (*opnd);
+ if (DECL_UID (SSA_NAME_VAR (*opnd)) !=
+ DECL_UID (SSA_NAME_VAR (entry_ptr->repr)) ||
+ SSA_NAME_VERSION (*opnd) != SSA_NAME_VERSION (entry_ptr->repr))
+ {
+ *opnd = entry_ptr->repr;
+ return true;
+ }
+ return false;
+
+ case INTEGER_CST:
+ return false;
+
+ default:
+ for (opnd_idx = 0; opnd_idx < TREE_CODE_LENGTH (code); opnd_idx++)
+ changed = local_addr_expr_cse (&TREE_OPERAND (*opnd, opnd_idx))
+ || changed;
+ }
+ return changed;
+}
+
+/* Remember local expressions that may contribute to address calculation,
+ and replace repeated occurrences with a representative SSA name.
+ Returns true iff STMT has been modified. */
+/* #### TODO: Break this large function up a bit. */
+static bool
+local_addr_cse (gimple stmt)
+{
+ tree lhs, rhs, *ops;
+ unsigned int i, num_ops, initial_i = 0;
+ local_addr_expr_t rhs_entry, new_entry;
+ void **slot;
+ enum tree_code subcode, code[2];
+ bool ignore_result, changed = false;
+ hashval_t hash;
+
+ /* We are only interested in assignments. */
+ if (!is_gimple_assign (stmt))
+ return false;
+
+ lhs = gimple_assign_lhs (stmt);
+
+ /* If copying one SSA name to another, first find the representative
+ of the RHS, and then make that the representative of the LHS. */
+ if (gimple_assign_ssa_name_copy_p (stmt))
+ {
+ rhs = gimple_assign_rhs1 (stmt);
+
+ /* We're only interested in simple SSA names, such as might have
+ been generated during address expansion. Ignore complex
+ memory references. */
+ if (TREE_CODE (rhs) == COMPONENT_REF ||
+ TREE_CODE (rhs) == ARRAY_REF ||
+ TREE_CODE (rhs) == MEM_REF ||
+ TREE_CODE (rhs) == TARGET_MEM_REF)
+ return false;
+
+ rhs_entry = local_addr_ssa_name_cse (rhs);
+ new_entry = alloc_local_addr_expr (SSA_NAME, lhs, NULL_TREE,
+ rhs_entry->repr,
+ hash_simple_op (lhs));
+ slot = htab_find_slot_with_hash (local_addr_table, new_entry,
+ new_entry->hashcode, INSERT);
+ gcc_assert (!*slot);
+ *slot = new_entry;
+ return false;
+ }
+
+ /* Simple converts that generate no code are treated slightly
+ differently. Even if the RHS is a simple variable, the conversion
+ means we can't propagate it directly. Track these as NOP_EXPRs
+ in the hash table. The first occurrence of a NOP_EXPR pattern
+ will receive its LHS SSA_NAME as its representative. */
+ if (gimple_assign_unary_nop_p (stmt))
+ {
+ ops = gimple_ops (stmt);
+ rhs = ops[1];
+
+ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ /* First do any replacement on the SSA_NAME of the NOP_EXPR. */
+ changed = local_addr_expr_cse (&rhs);
+
+ /* Now CSE the NOP_EXPR. */
+ rhs_entry = local_addr_nop_expr_cse (lhs, rhs);
+
+ if (rhs_entry->repr != lhs)
+ {
+ gimple_set_op (stmt, 1, rhs_entry->repr);
+ if (TREE_TYPE (lhs) == TREE_TYPE (rhs_entry->repr))
+ gimple_assign_set_rhs_code (stmt, SSA_NAME);
+ ignore_result = local_addr_cse (stmt);
+ return true;
+ }
+ return changed;
+ }
+
+ /* If it's some other form of copy to a simple SSA_NAME, make the
+ LHS its own representative. The RHS will be a constant or
+ something equally simple that doesn't need to be examined. */
+ if (gimple_assign_copy_p (stmt))
+ {
+ if (TREE_CODE (lhs) == SSA_NAME)
+ new_entry = local_addr_ssa_name_cse (lhs);
+ return false;
+ }
+
+ /* Walk each tree on the RHS looking for operands that are eligible to
+ be in the local CSE table, replacing them with their representatives.
+ If LHS is not an SSA_NAME, walk its operands, as well. */
+ ops = gimple_ops (stmt);
+ num_ops = gimple_num_ops (stmt);
+
+ if (TREE_CODE (lhs) == SSA_NAME)
+ initial_i = 1;
+
+ for (i = initial_i; i < num_ops; i++)
+ changed = local_addr_expr_cse (&ops[i]) || changed;
+
+ /* Now the operands are canonicalized, so try to match the expression
+ in the hash table. */
+ subcode = gimple_assign_rhs_code (stmt);
+ switch (subcode)
+ {
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ case MINUS_EXPR:
+ case MULT_EXPR:
+ case TRUNC_DIV_EXPR: /* #### */
+ case FLOOR_DIV_EXPR: /* #### */
+ case EXACT_DIV_EXPR:
+ /* Take a quick exit if this is obviously not part of an address
+ expression. */
+ for (i = 0; i < 2; i++)
+ {
+ code[i] = TREE_CODE (ops[i+1]);
+ if (code[i] == REAL_CST || code[i] == FIXED_CST ||
+ code[i] == COMPLEX_CST || code[i] == VECTOR_CST)
+ return changed;
+
+ /* Same for complex expressions that haven't been reduced. */
+ if (code[i] == ADDR_EXPR)
+ {
+ enum tree_code subcode1 = TREE_CODE (TREE_OPERAND (ops[i+1], 0));
+ if (subcode1 != VAR_DECL && subcode != PARM_DECL)
+ return changed;
+ }
+ }
+
+ hash = subcode ^ hash_simple_op (ops[1]) ^ hash_simple_op (ops[2]);
+ new_entry = alloc_local_addr_expr (subcode, ops[1], ops[2], lhs, hash);
+ slot = htab_find_slot_with_hash (local_addr_table, new_entry,
+ new_entry->hashcode, INSERT);
+
+ /* If we've seen it before, attempt to change this statement to an
+ SSA_NAME with the representative appearing as RHS1. If the
+ types of the LHS and the representative differ, attempt to use
+ a NOP_EXPR for some simple cases. Otherwise leave the RHS
+ alone. */
+ if (*slot)
+ {
+ local_addr_expr_t cached_entry_ptr = (local_addr_expr_t)*slot;
+ tree lhs_type, repr_type;
+ enum tree_code new_code;
+
+ gcc_assert (cached_entry_ptr->repr);
+ lhs_type = TREE_TYPE (lhs);
+ repr_type = TREE_TYPE (cached_entry_ptr->repr);
+
+ if (lhs_type == repr_type)
+ new_code = SSA_NAME;
+
+ else if ((INTEGRAL_TYPE_P (lhs_type) ||
+ POINTER_TYPE_P (lhs_type)) &&
+ (INTEGRAL_TYPE_P (repr_type) ||
+ POINTER_TYPE_P (repr_type)) &&
+ (TYPE_SIZE (lhs_type) == TYPE_SIZE (repr_type)))
+ new_code = NOP_EXPR;
+
+ else
+ return changed;
+
+ gimple_assign_set_rhs_code (stmt, new_code);
+ gimple_set_num_ops (stmt, 2);
+ gimple_set_op (stmt, 1, cached_entry_ptr->repr);
+
+ /* Reprocess this statement so the LHS picks up the
+ new RHS as its representative. */
+ ignore_result = local_addr_cse (stmt);
+ changed = true;
+ }
+ else
+ *slot = new_entry;
+ break;
+ default:
+ ;
+ }
+ return changed;
+}
+
+/* Examine AFF to pick a component to use as the base hint to
+ create_mem_ref. */
+static tree
+choose_base_hint (aff_tree *aff)
+{
+ unsigned i;
+
+ /* Pick the first tree with pointer type. If none exists,
+ and the tree in first position is an SSA_NAME, pick that. */
+ for (i = 0; i < aff->n; i++)
+ if (POINTER_TYPE_P (TREE_TYPE (aff->elts[i].val)))
+ return aff->elts[i].val;
+
+ if (TREE_CODE (aff->elts[0].val) == SSA_NAME)
+ return aff->elts[0].val;
+
+ return NULL_TREE;
+}
+
+/* Return TRUE iff NAME is immediately or transitively defined by a
+ PHI node, looking through chains of SSA_NAMEs and code-free
+ conversions. */
+/* #### This may now be redundant with the code in tree_ssa_lower_addr_tree
+ that handles a broader class of negative addressing patterns. Remove if
+ possible. */
+/* EXPERTS: A desirable refinement would be to only return TRUE when
+ the item defined by a PHI node is an induction variable. I'm
+ not sure whether the naming convention of ivtmp.n is sufficient,
+ or if there's another way to tell this. */
+static bool
+is_phi_defined (tree name)
+{
+ tree expr = name;
+
+ while (true)
+ {
+ gimple def_stmt;
+
+ if (TREE_CODE (expr) != SSA_NAME)
+ return false;
+
+ def_stmt = SSA_NAME_DEF_STMT (expr);
+
+ if (gimple_code (def_stmt) == GIMPLE_PHI)
+ return true;
+
+ if (!is_gimple_assign (def_stmt))
+ return false;
+
+ if (gimple_assign_ssa_name_copy_p (def_stmt) ||
+ gimple_assign_unary_nop_p (def_stmt))
+ expr = gimple_assign_rhs1 (def_stmt);
+ else
+ return false;
+ }
+
+ gcc_unreachable();
+ return false;
+}
+
+/* Return TRUE iff EXPR might be a hidden global store as defined by
+ the dead code eliminator. See tree-ssa-sink.c:is_hidden_global_store. */
+static bool
+may_be_global_store (tree expr)
+{
+ if (REFERENCE_CLASS_P (expr))
+ expr = get_base_address (expr);
+
+ /* If the base address couldn't be obtained, don't perform the
+ replacement. This shouldn't be possible if we've gotten this far. */
+ if (expr == NULL_TREE)
+ return false;
+
+ if (DECL_P (expr) && !is_global_var (expr))
+ return false;
+
+ if (INDIRECT_REF_P (expr) ||
+ TREE_CODE (expr) == MEM_REF ||
+ TREE_CODE (expr) == TARGET_MEM_REF)
+ return ptr_deref_may_alias_global_p (TREE_OPERAND (expr, 0));
+
+ if (CONSTANT_CLASS_P (expr))
+ return true;
+
+ return false;
+}
+
+/* We are considering replacing ORIG with REPL. If ORIG will
+ be recognized as a hidden global store by the dead code
+ eliminator, return FALSE iff REPL will also. If ORIG isn't
+ a hidden global store, return FALSE. */
+/* EXPERTS: This does not seem like something that should be necessary,
+ but it came up in test case insert.cc in the libstdc++ bucket.
+ In function uninitialized_fill_n_a, the following complex
+ addressing expression was analyzed:
+
+ this.3_41 = (_UIntPtrType)&MEM[(struct pointer &)&D.58605].D56875;
+ this.2_35 = (_UIntPtrType)&D.58605.D.56875;
+ pretmp.125_1 = (_UIntPtrType)&__cur.D.56875;
+ D.60670_27 = MEM[(const struct _Relative_pointer_impl *)&__cur];
+ D.60672_30 = D.60670_27 + pretmp.125_1;
+ D.60684_36 = D.60672_30 - this.2_35;
+ D.60692_42 = D.60684_36 + this.3_41;
+ D.60691_43 = (int *)D.60692_42;
+ D.60690_45 = *__x_7(D);
+ *D.60691_43 = D.60690_45;
+
+ The affine expansion was able to simplify this to:
+
+ D.60670_27 = MEM[(const struct _Relative_pointer_impl *)&__cur];
+ D.60690_45 = *__x_7(D);
+ MEM[&__cur, D.60670_27] = D.60690_45;
+
+ This is correct because field D56875 is at offset zero, and
+ the two "this" expressions cancel out. However, is_global_var
+ returns FALSE for __cur, but returns TRUE for D.60691_43.
+ I am at a loss to explain why this should be. I've verified
+ with nm that __cur is not a global.
+
+ Unfortunately the store in question is the only side effect of
+ the procedure, and the entire procedure body is eliminated
+ when this replacement is made.
+
+ Any ideas welcome. This was a profitable transformation, if
+ you discount the loss of the whole procedure... */
+static bool
+global_store_may_be_lost (tree orig, tree repl)
+{
+ if (!may_be_global_store (orig))
+ return false;
+
+ return !may_be_global_store (repl);
+}
+
+/* Expose target addressing modes in EXPR. Logic extracted and
+ simplified from tree-ssa-loop-ivopts.c to handle non-ivar
+ opportunities. IS_VDEF_STORE is true iff EXPR is the lhs of
+ a virtual definition. */
+
+static bool
+tree_ssa_lower_addr_tree (tree *expr, gimple_stmt_iterator *gsi,
+ bool speed, bool is_vdef_store)
+{
+ tree addr = *expr, mem_ref, base_hint;
+ aff_tree aff;
+ enum tree_code code = TREE_CODE (addr);
+ gimple_stmt_iterator save_gsi;
+ gimple save_stmt;
+ bool ignore_result;
+ unsigned i;
+
+ if (!REFERENCE_CLASS_P (addr))
+ return false;
+
+ /* If the expression is a virtual method table reference, don't attempt
+ this. Incorrect code generation occurs; an example test case is
+ testsuite/g++.dg/opt/pr47366.C. #### Check on this again. */
+ if (code == COMPONENT_REF && DECL_VIRTUAL_P (TREE_OPERAND (addr, 1)))
+ return false;
+
+ /* TODO: Bitfields are not yet handled; they're still a bit of a mess. */
+ if (code == BIT_FIELD_REF)
+ return false;
+ if (code == COMPONENT_REF && DECL_BIT_FIELD (TREE_OPERAND (addr, 1)))
+ return false;
+
+ /* Express the address of the expression as an affine combination of
+ trees, looking through SSA defs. If the expression may not be
+ addressable or is otherwise not a candidate, detect that and exit. */
+ if (!mem_tree_to_aff_combination_expand (&addr, &aff))
+ return false;
+
+ /* It is possible for aff to have no elements, in which case do
+ nothing. Example: lhs = MEM[(struct foo *) 0B].bar does not
+ produce an affine combination. */
+ if (!aff.n)
+ return false;
+
+ /* For analysis gathering. */
+ if (dump_file) {
+ fprintf (dump_file, "Original expression = \n");
+ print_generic_expr (dump_file, *expr, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, "\n\naff_comb =\n");
+ print_aff (dump_file, &aff);
+ fprintf (dump_file, "\n");
+ }
+
+ /* If aff contains any negative coefficients, check whether sizetype
+ is signed. If it isn't (as is the case for Fortran), the replacement
+ is unsafe: Unsigned arithmetic is performed with signed values.
+ Example testcase is gfortran/assumed_dummy_1.f90. */
+ if (TYPE_UNSIGNED (sizetype))
+ for (i = 0; i < aff.n; i++)
+ if (double_int_negative_p (aff.elts[i].coef))
+ return false;
+
+ /* Choose the best SSA name in the affine combination of trees
+ to coerce into the base position on the MEM_REF. If this isn't
+ done, a base expression of zero may occur, which confuses the
+ may-aliasing in subsequent dead store elimination. Furthermore,
+ if we choose badly, we can end up with a base expression that
+ points below the beginning of the object, which also confuses
+ may-aliasing. */
+ base_hint = choose_base_hint (&aff);
+
+ /* We must also watch out for complex combinations that can also force
+ base to point below the beginning of an object. Specifically, when
+ we have at least two trees and a nonzero offset, we can't allow
+ any of the trees to have a signed integral type. The logic in
+ create_mem_ref will try to fold the signed add into the base
+ expression on some architectures. When the signed tree has a
+ negative value, a bad base pointer is built. */
+ if (aff.n > 1 && !double_int_zero_p (aff.offset))
+ for (i = 0; i < aff.n; i++)
+ {
+ tree val = aff.elts[i].val;
+ tree val_type = TREE_TYPE (val);
+ if (INTEGRAL_TYPE_P (val_type))
+ if (!TYPE_UNSIGNED (val_type))
+ return false;
+ else if (CONVERT_EXPR_CODE_P (TREE_CODE (val)))
+ {
+ /* Even if the tree is an unsigned integral type, we must
+ check for a conversion from a signed type. */
+ tree opnd_type = TREE_TYPE (TREE_OPERAND (val, 0));
+ if (INTEGRAL_TYPE_P (opnd_type) &&
+ !TYPE_UNSIGNED (opnd_type))
+ return false;
+ }
+ }
+
+ /* Copy the current statement iterator, and memorize the position
+ one statement previous. */
+ save_stmt = gsi_stmt (*gsi);
+ save_gsi = *gsi;
+ gsi_prev (&save_gsi);
+
+ /* Replace the input expression with an equivalent memory reference
+ legal on the target machine. A side effect is that feeding
+ expressions may be generated prior to GSI. */
+ mem_ref = create_mem_ref (gsi, TREE_TYPE (*expr), &aff,
+ reference_alias_ptr_type (*expr),
+ NULL_TREE, base_hint, speed);
+
+ /* For analysis gathering. */
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nmem_ref = ");
+ print_generic_expr (dump_file, mem_ref, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, "\n\n");
+ }
+
+ /* We have to watch out for a situation with induction variables. An
+ ivar for striding through an array is often initialized to the base
+ of element -1 outside its loop. The loop then increments the ivar
+ before referencing it. If we aren't careful, we will end up with a
+ base register below the object it references, which makes aliasing
+ unhappy. The way we work around this right now is to refuse to
+ replace the memory reference when the base expression is an SSA
+ name that is directly or transitively defined by a PHI expression. */
+ /* #### Check whether this still occurs with the integral tree
+ avoidance algorithm above. */
+ if (is_phi_defined (TREE_OPERAND (mem_ref, 0)))
+ return false;
+
+ /* Ensure the memory reference was not built with a base expression
+ of zero, which confuses the may-aliasing in subsequent dead
+ store elimination. #### There are still occurrences of this.
+ Investigate. */
+ if (integer_zerop (TREE_OPERAND (mem_ref, 0)))
+ return false;
+
+ /* If the original expression is a mem_ref that will be recognized
+ as a hidden global store (see tree-ssa-sink.c:is_hidden_global_store),
+ but the replacement will not be so recognized, the replacement
+ is unsafe. The statement may not be marked as always-necessary
+ in the dead code eliminator. */
+ if (is_vdef_store && global_store_may_be_lost (*expr, mem_ref))
+ return false;
+
+ /* Chicken switch for debug. Use -fdbg-cnt=lower_addr:N to disable
+ after N replacements. */
+ if (!dbg_cnt (lower_addr))
+ return false;
+
+ /* We're going to use the replacement. CSE any new gimple stmts
+ that were added, since we are likely exposing redundant
+ computations. */
+ if (gsi_end_p (save_gsi))
+ save_gsi = gsi_start (gsi->seq);
+ else
+ gsi_next (&save_gsi);
+ for (; gsi_stmt (save_gsi) != save_stmt; gsi_next (&save_gsi))
+ ignore_result = local_addr_cse (gsi_stmt (save_gsi));
+
+ /* EXPERTS: This would potentially be a good spot to invoke a
+ cost function, since we can easily determine how many live
+ instructions were added for the new MEM_REF. Currently the
+ algorithm is aggressive about exposing opportunities, since
+ commoning different field and array references happens
+ frequently in single blocks. The down side is that it is
+ possible for expressions to be duplicated from dominating
+ blocks (global un-CSEing).
+
+ This does not seem to be a big deal, since we still have CSE
+ passes in the RTL coming up, and duplicates will only be
+ placed in blocks dominated by the originals; i.e., we're
+ certain to clean this up downstream. On the other hand,
+ if the goal is for the GIMPLE passes to be as self-sufficient
+ as possible, it might be desirable to avoid creating deep
+ chains of duplicate expressions.
+
+ My current plan is to wait with any scaling-back until
+ after some performance analysis. I welcome your thoughts. */
+
+ /* Finish the replacement. */
+ copy_ref_info (mem_ref, *expr);
+ *expr = mem_ref;
+
+ return true;
+}
+
+/* Expose target addressing modes in STMT, which must be an assignment. */
+
+static bool
+tree_ssa_lower_addr_stmt (gimple stmt, gimple_stmt_iterator *gsi, bool speed)
+{
+ tree *lhs, *rhs1;
+ bool changed;
+
+ lhs = gimple_assign_lhs_ptr (stmt);
+ changed = tree_ssa_lower_addr_tree (lhs, gsi, speed, gimple_vdef (stmt));
+
+ rhs1 = gimple_assign_rhs1_ptr (stmt);
+ changed = tree_ssa_lower_addr_tree (rhs1, gsi, speed, false) || changed;
+
+ if (changed)
+ update_stmt (stmt);
+
+ return changed;
+}
+
+/* Process the statements in block BB. */
+
+static void
+tree_ssa_lower_addr_block (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ bool changed = false;
+
+ bool speed = optimize_bb_for_speed_p (bb);
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+
+ if (gimple_vuse (stmt) && gimple_assign_single_p (stmt))
+ changed = tree_ssa_lower_addr_stmt (stmt, &gsi, speed) || changed;
+
+ /* Whether changed or not, gather potential information for
+ local CSE. */
+ if (local_addr_cse (stmt))
+ {
+ changed = true;
+ update_stmt (stmt);
+ }
+ }
+
+ /* If anything changed in the block, the SSA immediate use lists may
+ be hosed up. Clean the whole block. (EXPERTS: Is there a
+ better way to do this? #### It may also no longer be necessary to
+ call update_stmt inline in two other places above.) */
+ if (changed)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ update_stmt (gsi_stmt (gsi));
+
+ if (dump_file && changed) {
+ fprintf (dump_file, "Changed bb %d:\n", bb->index);
+ dump_bb (bb, dump_file, 0);
+ fprintf (dump_file, "\n");
+ }
+}
+
+/* Main entry point for exposing target addressing modes not caught
+ by tree-ssa-loop-ivopts.c. */
+
+static unsigned int
+tree_ssa_lower_addr (void)
+{
+ basic_block bb;
+
+ /* A hash table of subexpressions that contribute to addressing
+ computation is maintained for one block at a time. The entries
+ addressed out of the hash table are stored in an allocation
+ pool cleared at the end of each block. */
+ local_addr_table = htab_create (500, local_addr_hash,
+ local_addr_eq, local_addr_free);
+ local_addr_pool = create_alloc_pool ("local_addr_pool",
+ sizeof (local_addr_expr),
+ 100);
+
+ FOR_EACH_BB (bb)
+ {
+ tree_ssa_lower_addr_block (bb);
+ htab_empty (local_addr_table);
+ empty_alloc_pool (local_addr_pool);
+ }
+
+ htab_delete (local_addr_table);
+ free_alloc_pool (local_addr_pool);
+
+ return 0;
+}
+
+struct gimple_opt_pass pass_tree_lower_addr =
+{
+ {
+ GIMPLE_PASS,
+ "loweraddr", /* name */
+ NULL, /* gate */
+ tree_ssa_lower_addr, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_LOWER_ADDR, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
+ }
+};
===================================================================
@@ -1,6 +1,6 @@
/* Front-end tree definitions for GNU compiler.
Copyright (C) 1989, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
This file is part of GCC.
@@ -5580,6 +5580,9 @@
extern tree tree_mem_ref_addr (tree, tree);
extern void copy_mem_ref_info (tree, tree);
+/* In tree-ssa-loop-ivopts.c. */
+extern void copy_ref_info (tree, tree);
+
/* In tree-vrp.c */
extern bool ssa_name_nonnegative_p (const_tree);
===================================================================
@@ -385,6 +385,7 @@
extern struct gimple_opt_pass pass_parallelize_loops;
extern struct gimple_opt_pass pass_loop_prefetch;
extern struct gimple_opt_pass pass_iv_optimize;
+extern struct gimple_opt_pass pass_tree_lower_addr;
extern struct gimple_opt_pass pass_tree_loop_done;
extern struct gimple_opt_pass pass_ch;
extern struct gimple_opt_pass pass_ccp;
===================================================================
@@ -1,3 +1,25 @@
+2011-02-24 William Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * Makefile.in: Add build step for tree-ssa-lower-addr.o; add
+ dependency to tree-affine.o.
+ * dbgcnt.def (lower_addr): Add new debug counter.
+ * passes.c: Add pass for pass_tree_lower_addr.
+ * timevar.def (TV_TREE_ADDR_MODE): New timevar definition.
+ * tree.h (copy_ref_info): New prototype of existing function.
+ * tree-affine.c (mem_tree_to_aff_combination_expand): New
+ function.
+ * tree-affine.h (mem_tree_to_aff_combination_expand): New
+ declaration.
+ * tree-flow.h (may_be_unaligned_p): New prototype of existing
+ function.
+ * tree-pass.h (pass_tree_addr_mode): New declaration.
+ * tree-ssa-address.c (addr_to_parts): Change to handle null
+ iv_cand parameter, and to avoid base expressions that
+ incorporate negative offsets.
+ * tree-ssa-loop-ivopts.c (may_be_unaligned_p, copy_ref_info):
+ Remove static keyword.
+ * tree-ssa-lower-addr.c: New module.
+
2011-02-04 Jakub Jelinek <jakub@redhat.com>
PR inline-asm/23200
===================================================================
@@ -1604,7 +1604,7 @@
/* Returns true if memory reference REF with step STEP may be unaligned. */
-static bool
+bool
may_be_unaligned_p (tree ref, tree step)
{
tree base;
@@ -5916,7 +5916,7 @@
/* Copies the reference information from OLD_REF to NEW_REF. */
-static void
+void
copy_ref_info (tree new_ref, tree old_ref)
{
tree new_ptr_base = NULL_TREE;
===================================================================
@@ -1,7 +1,7 @@
/* This file contains the definitions for timing variables used to
measure run-time performance of the compiler.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
- 2009, 2010
+ 2009, 2010, 2011
Free Software Foundation, Inc.
Contributed by Alex Samuel <samuel@codesourcery.com>
@@ -236,6 +236,7 @@
DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis")
DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization")
DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution")
+DEFTIMEVAR (TV_TREE_LOWER_ADDR , "expose addressing modes")
/* Everything else in rest_of_compilation not included above. */
DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes")
===================================================================
@@ -1,5 +1,5 @@
/* Memory address lowering and addressing mode selection.
- Copyright (C) 2004, 2006, 2007, 2008, 2009, 2010
+ Copyright (C) 2004, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
This file is part of GCC.
@@ -493,10 +493,11 @@
aff_combination_remove_elt (addr, i);
}
-/* Adds ELT to PARTS. */
+/* Adds ELT to PARTS. IF NOT_BASE is true, it is unsafe to add this
+ part to PARTS->base because it contains a negative coefficient. */
static void
-add_to_parts (struct mem_address *parts, tree elt)
+add_to_parts (struct mem_address *parts, tree elt, bool not_base)
{
tree type;
@@ -512,6 +513,14 @@
return;
}
+ /* If it's inadvisable to add ELT to base, add it to index instead. */
+ if (not_base)
+ {
+ type = TREE_TYPE (parts->index);
+ parts->index = fold_build2 (PLUS_EXPR, type, parts->index, elt);
+ return;
+ }
+
/* Add ELT to base. */
type = TREE_TYPE (parts->base);
if (POINTER_TYPE_P (type))
@@ -613,6 +622,7 @@
{
tree part;
unsigned i;
+ bool not_base;
parts->symbol = NULL_TREE;
parts->base = NULL_TREE;
@@ -629,9 +639,12 @@
/* No need to do address parts reassociation if the number of parts
is <= 2 -- in that case, no loop invariant code motion can be
- exposed. */
+ exposed.
- if (!base_hint && (addr->n > 2))
+ Ensure iv_cand exists before attempting this, since this code
+ is also called from outside IVopts. */
+
+ if (!base_hint && (addr->n > 2) && iv_cand)
move_variant_to_index (parts, addr, iv_cand);
/* First move the most expensive feasible multiplication
@@ -650,14 +663,21 @@
/* Then try to process the remaining elements. */
for (i = 0; i < addr->n; i++)
{
+ /* If we have a preferred base expression, we must avoid producing
+ a folded base expression that addresses negatively from that
+ object's beginning. Aliasing does not recognize such MEM_REFs
+ as may-aliased with overlapping references based within the object.
+ EXPERTS: An alternative would be to remove this limitation from
+ aliasing, but I presume this rule is fixed in stone? */
+ bool not_base = base_hint && double_int_negative_p (addr->elts[i].coef);
part = fold_convert (sizetype, addr->elts[i].val);
if (!double_int_one_p (addr->elts[i].coef))
part = fold_build2 (MULT_EXPR, sizetype, part,
double_int_to_tree (sizetype, addr->elts[i].coef));
- add_to_parts (parts, part);
+ add_to_parts (parts, part, not_base);
}
if (addr->rest)
- add_to_parts (parts, fold_convert (sizetype, addr->rest));
+ add_to_parts (parts, fold_convert (sizetype, addr->rest), not_base);
}
/* Force the PARTS to register. */
===================================================================
@@ -1,5 +1,5 @@
/* Operations with affine combinations of trees.
- Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of GCC.
@@ -28,6 +28,7 @@
#include "tree-affine.h"
#include "gimple.h"
#include "flags.h"
+#include "tree-flow.h"
/* Extends CST as appropriate for the affine combinations COMB. */
@@ -712,6 +713,45 @@
aff_combination_expand (comb, cache);
}
+/* Expand a memory reference into an affine combination, returning
+ false if EXPR is possibly not addressable. EXPR is modified to
+ an addressing expression. #### Currently also returns false if
+ EXPR might be misaligned on a STRICT_ALIGNMENT target. */
+bool
+mem_tree_to_aff_combination_expand (tree *expr, aff_tree *comb)
+{
+ struct pointer_map_t *cache;
+
+ if (TREE_CODE (*expr) == TARGET_MEM_REF)
+ {
+ tree type = build_pointer_type (TREE_TYPE (*expr));
+ *expr = tree_mem_ref_addr (type, *expr);
+ }
+ else
+ {
+ /* Check that the expression is addressable, since the access
+ may include a VIEW_CONVERT_EXPR or a bitfield reference. */
+ if (may_be_nonaddressable_p (*expr))
+ return false;
+
+ /* On strict alignment platforms, check that the base expression
+ is sufficiently aligned. There is no additional "step"
+ information required, so specify single-byte alignment for
+ that parameter. */
+ if (STRICT_ALIGNMENT && may_be_unaligned_p (*expr, size_one_node))
+ return false;
+
+ *expr = build_fold_addr_expr (*expr);
+ }
+
+ /* Expand to affine combination, looking throuh SSA defs. */
+ cache = pointer_map_create ();
+ tree_to_aff_combination_expand (*expr, TREE_TYPE (*expr), comb, &cache);
+ pointer_map_destroy (cache);
+
+ return true;
+}
+
/* Frees memory occupied by struct name_expansion in *VALUE. Callback for
pointer_map_traverse. */
===================================================================
@@ -1,5 +1,5 @@
/* Operations with affine combinations of trees.
- Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2007, 2008, 2011 Free Software Foundation, Inc.
This file is part of GCC.
@@ -74,6 +74,7 @@
void aff_combination_expand (aff_tree *, struct pointer_map_t **);
void tree_to_aff_combination_expand (tree, tree, aff_tree *,
struct pointer_map_t **);
+bool mem_tree_to_aff_combination_expand (tree *, aff_tree *);
void get_inner_reference_aff (tree, aff_tree *, double_int *);
void free_affine_expand_cache (struct pointer_map_t **);
===================================================================
@@ -808,6 +808,7 @@
addr_space_t);
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool);
bool may_be_nonaddressable_p (tree expr);
+bool may_be_unaligned_p (tree ref, tree step);
/* In tree-ssa-threadupdate.c. */
extern bool thread_through_all_blocks (bool);
===================================================================
@@ -3,7 +3,7 @@
# Copyright (C) 1987, 1988, 1990, 1991, 1992, 1993, 1994, 1995, 1996,
# 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-# 2008, 2009, 2010 Free Software Foundation, Inc.
+# 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
#This file is part of GCC.
@@ -1389,6 +1389,7 @@
tree-sra.o \
tree-switch-conversion.o \
tree-ssa-address.o \
+ tree-ssa-lower-addr.o \
tree-ssa-alias.o \
tree-ssa-ccp.o \
tree-ssa-coalesce.o \
@@ -2591,7 +2592,7 @@
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
$(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \
output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \
- tree-pretty-print.h
+ tree-pretty-print.h $(TREE_FLOW_H)
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
$(BASIC_BLOCK_H) output.h $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) \
@@ -2603,6 +2604,10 @@
$(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) $(BASIC_BLOCK_H) \
pointer-set.h tree-affine.h tree-ssa-propagate.h gimple-pretty-print.h \
tree-pretty-print.h
+tree-ssa-lower-addr.o : tree-ssa-lower-addr.c $(CONFIG_H) $(SYSTEM_H) \
+ coretypes.h $(TREE_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) $(TREE_DUMP_H) \
+ $(TREE_PASS_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) $(TREE_AFFINE_H) \
+ tree-pretty-print.h $(DBGCNT_H) alloc-pool.h
tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
$(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
===================================================================
@@ -1,7 +1,7 @@
/* Top level of GCC compilers (cc1, cc1plus, etc.)
Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
- Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+ 2011 Free Software Foundation, Inc.
This file is part of GCC.
@@ -922,6 +922,7 @@
only examines PHIs to discover const/copy propagation
opportunities. */
NEXT_PASS (pass_phi_only_cprop);
+ NEXT_PASS (pass_tree_lower_addr);
NEXT_PASS (pass_cd_dce);
NEXT_PASS (pass_tracer);