Proposal for IPA init() constant propagation
diff mbox series

Message ID 1b09f7dc666d8e0ce16c47e667592076@theobroma-systems.com
State New
Headers show
Series
  • Proposal for IPA init() constant propagation
Related show

Commit Message

erick.ochoa@theobroma-systems.com Nov. 15, 2019, 11:29 p.m. UTC
Hello,

we've been working on an lto optimization and would like to receive some 
feedback on this candidate patch.
At the moment the patch compiles correctly when applied to master.
We have some initial test cases in a separate repository and have 
compiled and ran a small subset of CPU 2017 benchmarks with comparable 
results.

The proposed transformation (ipa-initcall-cp) is a variant of 
interprocedural constant propagation.
ip-initcall-cp collects all variables with static lifetime that contain 
only a single write (like in the cases of initialization functions) and 
propagates it to reads across the whole link unit.

In order to run, apply the patch and compile with `-lto -finitcall-cp`.

In order for this transformation to be sound
* writes that can be reached from a function pointer,
* writes that can be reached from a function with outside visibility, 
and
* writes that may happen after any read
are not taken into account.

In order to determine that no read happens before the write, we have to:
* find the main function
* find the function and basic block of the write
*   for each read in another function
*     make sure that call to write dominates all callees of the read 
function
*   for each read in the same function
*     make sure that write dominates read

Some initial drawbacks:
* We've noticed that we have to disable ipa-cp in order for 
ipa-initcall-cp to run successfully.
This is most likely due to some issue with clones and we will need to 
make some design changes.
The function materialize all clones fails after ipa-initcall-cp.
Suggestions are welcomed.
* At the moment, I still need to clean the code a bit, since it doesn't 
pass the standards.
* I still need to add tests using the testsuite as opposed to running 
them myself.

Some future work:
* At the moment, ipa-initcall-cp only propagates values from a single 
write.
However, we could conceivably improve this work to propagate the first 
n-writes and specialize functions based on the constants.

Here is a link to the bugzilla ticket: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92538

2019-11-15  Erick Ochoa <erick.ochoa@theobroma-systems.com>

	Proposal for IPA init() constant propagation
         * gcc/Makefile.in: Add pass to the build process
	* gcc/cgraph.c: Add assertion.
         * gcc/common.opt: Add pass
         * gcc/ipa-initcall-cp.c: new
         * gcc/passes.def: Place pass in opt plan
         * gcc/tree-pass.h: Add pass

*ctxt);
  extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
  extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);
  extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt);

Patch
diff mbox series

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 597dc01..f6b11f7 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1369,6 +1369,7 @@  OBJS = \
  	init-regs.o \
  	internal-fn.o \
  	ipa-cp.o \
+	ipa-initcall-cp.o \
  	ipa-devirt.o \
  	ipa-fnsummary.o \
  	ipa-polymorphic-call.o \
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index ea8ab38..e7aa63f 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -3602,6 +3602,8 @@  cgraph_node::get_untransformed_body (void)
    struct lto_in_decl_state *decl_state
  	 = lto_get_function_in_decl_state (file_data, decl);

+  gcc_assert (decl_state != NULL);
+
    data = lto_get_section_data (file_data, LTO_section_function_body,
  			       name, &len, decl_state->compressed);
    if (!data)
diff --git a/gcc/common.opt b/gcc/common.opt
index c160538..31c98f7 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3320,4 +3320,7 @@  fipa-ra
  Common Report Var(flag_ipa_ra) Optimization
  Use caller save register across calls if possible.

+finitcall-cp
+Common Report Var(flag_initcall_cp) TBD.
+
  ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c
new file mode 100644
index 0000000..b3f8e0a
--- /dev/null
+++ b/gcc/ipa-initcall-cp.c
@@ -0,0 +1,1149 @@ 
+/* Initcall constant propagation pass.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-expr.h"
+#include "gimple-iterator.h"
+#include "predict.h"
+#include "alloc-pool.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "fold-const.h"
+#include "gimple-fold.h"
+#include "symbol-summary.h"
+#include "tree-vrp.h"
+#include "ipa-prop.h"
+#include "tree-pretty-print.h"
+#include "tree-inline.h"
+#include "params.h"
+#include "ipa-fnsummary.h"
+#include "ipa-utils.h"
+#include "tree-ssa-ccp.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "gimple-pretty-print.h"
+#include "ssa.h"
+
+static bool bb1_dominates_bb2 (basic_block bb1, basic_block bb2, 
cgraph_node *cnode);
+static bool write_before_call (struct ipa_ref *write, struct ipa_ref 
*read);
+static bool call_before_read(struct ipa_ref *write, struct ipa_ref 
*read);
+
+static bool
+comdat_can_be_unshared_p_1 (symtab_node *node)
+{
+  if (!node->externally_visible)
+    return true;
+  if (node->address_can_be_compared_p ())
+    {
+      struct ipa_ref *ref;
+
+      for (unsigned int i = 0; node->iterate_referring (i, ref); i++)
+        if (ref->address_matters_p ())
+          return false;
+    }
+
+  /* If the symbol is used in some weird way, better to not touch it.  
*/
+  if (node->force_output)
+    return false;
+
+  /* Explicit instantiations needs to be output when possibly
+     used externally.  */
+  if (node->forced_by_abi
+      && TREE_PUBLIC (node->decl)
+      && (node->resolution != LDPR_PREVAILING_DEF_IRONLY
+          && !flag_whole_program))
+    return false;
+
+  /* Non-readonly and volatile variables cannot be duplicated.  */
+  if (is_a <varpool_node *> (node)
+      && (!TREE_READONLY (node->decl)
+      || TREE_THIS_VOLATILE (node->decl)))
+    return false;
+  return true;
+}
+
+/* COMDAT functions must be shared only if they have address taken,
+   otherwise we can produce our own private implementation with
+   -fwhole-program.
+   Return true when turning COMDAT function static cannot lead to wrong
+   code when the resulting object links with a library defining same 
COMDAT.
+
+   Virtual functions do have their addresses taken from the vtables,
+   but in C++ there is no way to compare their addresses for equality.  
*/
+
+static bool
+comdat_can_be_unshared_p (symtab_node *node)
+{
+  if (!comdat_can_be_unshared_p_1 (node))
+    return false;
+  if (node->same_comdat_group)
+    {
+      symtab_node *next;
+
+      /* If more than one function is in the same COMDAT group, it must
+         be shared even if just one function in the comdat group has
+         address taken.  */
+      for (next = node->same_comdat_group;
+        next != node; next = next->same_comdat_group)
+        if (!comdat_can_be_unshared_p_1 (next))
+          return false;
+    }
+  return true;
+}
+
+/* Return true when function NODE should be considered externally 
visible.  */
+
+static bool
+cgraph_externally_visible_p (struct cgraph_node *node,
+                             bool whole_program)
+{
+  while (node->transparent_alias && node->definition)
+    node = node->get_alias_target ();
+  if (!node->definition)
+    return false;
+  if (!TREE_PUBLIC (node->decl)
+      || DECL_EXTERNAL (node->decl))
+    return false;
+
+  /* Do not try to localize built-in functions yet.  One of problems is 
that we
+     end up mangling their asm for WHOPR that makes it impossible to 
call them
+     using the implicit built-in declarations anymore.  Similarly this 
enables
+     us to remove them as unreachable before actual calls may appear 
during
+     expansion or folding.  */
+  if (fndecl_built_in_p (node->decl))
+    return true;
+
+  /* If linker counts on us, we must preserve the function.  */
+  if (node->used_from_object_file_p ())
+    return true;
+  if (DECL_PRESERVE_P (node->decl))
+    return true;
+  if (lookup_attribute ("externally_visible",
+                        DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+      && lookup_attribute ("dllexport",
+                           DECL_ATTRIBUTES (node->decl)))
+    return true;
+  if (node->resolution == LDPR_PREVAILING_DEF_IRONLY)
+    return false;
+  /* When doing LTO or whole program, we can bring COMDAT functoinsp 
static.
+     This improves code quality and we know we will duplicate them at 
most twice
+     (in the case that we are not using plugin and link with object 
file
+      implementing same COMDAT)  */
+  if (((in_lto_p || whole_program) && !flag_incremental_link)
+      && DECL_COMDAT (node->decl)
+      && comdat_can_be_unshared_p (node))
+    return false;
+
+  /* When doing link time optimizations, hidden symbols become local.  
*/
+  if ((in_lto_p && !flag_incremental_link)
+      && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
+      || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL)
+      /* Be sure that node is defined in IR file, not in other object
+         file.  In that case we don't set used_from_other_object_file.  
*/
+      && node->definition)
+    ;
+  else if (!whole_program)
+    return true;
+
+  if (MAIN_NAME_P (DECL_NAME (node->decl)))
+    return true;
+
+  return false;
+}
+
+typedef vec <struct ipa_ref*> ipa_ref_vec;
+
+//XXX: Move to ipa-ref.h
+static const char* stringify_ipa_ref_use(ipa_ref_use use)
+{
+  switch (use) {
+    case IPA_REF_LOAD:
+      return "IPA_REF_LOAD";
+      break;
+    case IPA_REF_STORE:
+      return "IPA_REF_STORE";
+      break;
+    case IPA_REF_ADDR:
+      return "IPA_REF_ADDR";
+      break;
+    case IPA_REF_ALIAS:
+      return "IPA_REF_ALIAS";
+      break;
+    default:
+      return "<unknown>";
+  }
+}
+
+static void
+load_function_body_of_ipa_ref (cgraph_node* node)
+{
+  if (in_lto_p)
+    {
+      cgraph_node *f_cnode2 = node->ultimate_alias_target ();
+      while (f_cnode2->clone_of) {
+        if (dump_file) fprintf (dump_file, "Copy of...well, we still 
need the constant!\n");
+        f_cnode2 = f_cnode2->clone_of;
+      }
+      if (dump_file)
+        {
+        fprintf (dump_file, "%s: for function '%s'\n", __func__, 
f_cnode2->dump_asm_name ());
+        dump_node (f_cnode2->decl, TDF_DETAILS, dump_file);
+        }
+      f_cnode2->get_untransformed_body();
+    }
+}
+
+static void
+load_function_body_of_ipa_ref (struct ipa_ref* ref)
+{
+  /* IPA passes don't get the function bodies during LTO.  */
+  if (in_lto_p)
+    {
+      symtab_node *f_node = ref->referring;
+      cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node);
+      load_function_body_of_ipa_ref (f_cnode);
+    }
+}
+
+
+static void
+dump_vnode_ipa_ref (ipa_ref* ref)
+{
+  fprintf (dump_file, "Reference type: %s\n", stringify_ipa_ref_use 
(ref->use));
+
+  symtab_node *f_node = ref->referring;
+  fprintf (dump_file, "Reference function: %s\n", f_node->dump_asm_name 
());
+
+  fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n", 
(long)ref->stmt, ref->lto_stmt_uid);
+  load_function_body_of_ipa_ref (ref);
+  print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE);
+}
+
+//XXX: Move to tree.h
+static bool
+tree_code_is_cst (tree op)
+{
+  //XXX: use cprop_constant_p() here?
+  int code = TREE_CODE (op);
+  if (code == INTEGER_CST ||
+      code == REAL_CST ||
+      code == COMPLEX_CST ||
+      code == VECTOR_CST)
+    return true;
+  return false;
+}
+
+/* Return true of all ops of assignment are constants.  */
+static bool
+gimple_assign_is_single_const(gimple *stmt)
+{
+  tree op;
+
+  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
+
+  if (gimple_has_volatile_ops (stmt))
+    {
+      if (dump_file)
+        fprintf (dump_file, "has volatile ops!\n");
+      return false;
+    }
+
+  if (gimple_num_ops (stmt) != 2)
+    {
+      if (dump_file)
+        fprintf (dump_file, "wrong num of ops: %u!\n", gimple_num_ops 
(stmt));
+      return false;
+    }
+
+  op = gimple_op (stmt, 1);
+  if (!tree_code_is_cst (op))
+    {
+      if (dump_file)
+        fprintf (dump_file, "op is not cst!\n");
+      return false;
+    }
+
+  return true;
+}
+
+//XXX: Move to ipa-utils.c
+//XXX: from/to could be const
+static bool
+path_exists_dfs (hash_set<cgraph_node*> *visited, cgraph_node* current, 
cgraph_node *target)
+{
+  if (current == target)
+    return true;
+
+  visited->add (current);
+
+  cgraph_edge *cs;
+  for (cs = current->callees; cs; cs = cs->next_callee)
+    {
+      cgraph_node *callee = cs->callee->function_symbol ();
+      if (!visited->contains (callee))
+	{
+	  if (path_exists_dfs (visited, callee, target))
+	    {
+	      return true;
+	    }
+	}
+    }
+  return false;
+}
+
+//XXX: Move to ipa-utils.c
+//XXX: from/to could be const
+static bool
+path_exists (cgraph_node* from, cgraph_node *to)
+{
+  hash_set<cgraph_node*> visited;
+  bool found_path = path_exists_dfs (&visited, from, to);
+  visited.empty ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Found ");
+      if (!found_path)
+	fprintf (dump_file, "*no* ");
+      fprintf (dump_file, "path from %s to %s.\n", from->name (), 
to->name ());
+    }
+  return found_path;
+}
+
+vec<struct cgraph_node*>
+get_nondominated_callees(cgraph_node* caller, cgraph_node* callee)
+{
+  // assumptions:
+  //  * there is a callsite from caller to callee
+
+  if (in_lto_p)
+    caller->get_untransformed_body ();
+
+  function *func = DECL_STRUCT_FUNCTION (caller->decl);
+  basic_block bb;
+  bool found = false;
+  FOR_EACH_BB_FN(bb, func)
+  {
+     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p 
(gsi); gsi_next (&gsi))
+     {
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) != GIMPLE_CALL)
+	    continue;
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+         if (callee_node != callee)
+            continue;
+
+         found = true;
+    }
+  if (found) break;
+  }
+
+  gcc_assert (found);
+
+  // At this point in the program, we hold a valid bb.
+  // The callee is located
+  vec<struct cgraph_node*> callees = vNULL;
+  basic_block bb_c;
+  FOR_EACH_BB_FN(bb_c, func)
+  {
+     bool self_dominates = bb == bb_c;
+     bool bb_dominates_bbc = bb1_dominates_bb2(bb, bb_c, caller);
+     if (bb_dominates_bbc && !self_dominates) continue;
+
+     for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); !gsi_end_p 
(gsi); gsi_next (&gsi))
+     {
+         gimple *stmt = gsi_stmt (gsi);
+         if (gimple_code (stmt) != GIMPLE_CALL)
+	    continue;
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+
+         if (fndecl_built_in_p (callee_node->decl))
+            continue;
+
+         if (self_dominates && callee_node == callee) {
+             break;
+         }
+
+         callees.safe_push (callee_node);
+    }
+
+  }
+  return callees;
+}
+
+
+//XXX: Move to class symtab_node in cgraph.h
+static ipa_ref* symtab_node_iterate_address_uses (const symtab_node *n, 
unsigned i, ipa_ref *&ref)
+{
+  n->ref_list.referring.iterate (i, &ref);
+
+  if (ref && ref->use != IPA_REF_ADDR)
+    return NULL;
+
+  return ref;
+}
+
+//XXX: Move to class symtab_node in cgraph.h
+static bool
+symtab_node_address_matters_p(const symtab_node *n)
+{
+  ipa_ref *ref = NULL;
+
+  return (symtab_node_iterate_address_uses (n, 0, ref) != NULL);
+}
+
+static bool
+bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node 
*cnode)
+{
+   // self dominance
+   if (bb1 == bb2) return true;
+
+   push_cfun (DECL_STRUCT_FUNCTION (cnode->decl));
+
+   /* If dominator info is not available, we need to calculate it.  */
+   if (!dom_info_available_p (CDI_DOMINATORS))
+      calculate_dominance_info (CDI_DOMINATORS);
+
+   /* Check if the read is dominated by the write.  */
+   bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1);
+
+   /* Restore cfun.  */
+   pop_cfun ();
+
+   return ret;
+}
+
+static bool
+write_before_read_in_function (struct ipa_ref *write, struct ipa_ref 
*read)
+{
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (w_bb != r_bb)
+    {
+      /*
+       * The dominator framework operates on cfun.
+       * Therefore we need to set cfun accordingly.
+       */
+      symtab_node *w_node = write->referring;
+      cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
+      return bb1_dominates_bb2(w_bb, r_bb, w_cnode);
+    }
+
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next(&gsi))
+  {
+    if (gsi_stmt (gsi) == write->stmt)
+      return true;
+    if (gsi_stmt (gsi) == read->stmt)
+      return false;
+  }
+
+  /* Neither write nor read found it BB.  */
+  gcc_assert (1);
+
+  return false;
+}
+
+/*
+ * DFS over callees and return if callee is a or b.
+ */
+static cgraph_node*
+find_cgraph_in_callee_set (cgraph_node* n, hash_set<cgraph_node*> set, 
cgraph_node* a, cgraph_node* b)
+{
+  cgraph_edge *cs;
+  for (cs = n->callees; cs; cs = cs->next_callee)
+    {
+      cgraph_node *callee = cs->callee->function_symbol ();
+      if (dump_file)
+	fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (), 
callee->dump_asm_name ());
+      if (callee == a)
+	return a;
+      if (callee == b)
+	return b;
+      if (!set.contains (callee))
+	continue;
+      return find_cgraph_in_callee_set (callee, set, a, b);
+    }
+  return NULL;
+}
+
+/* Walks back along caller relations until main is found.  */
+static cgraph_node*
+get_ancestor_main_dfs (hash_set<cgraph_node*>* visited, 
vec<cgraph_node*>* path, cgraph_node *node)
+{
+  if (MAIN_NAME_P (DECL_NAME (node->decl)))
+    {
+    path->safe_push(node);
+    return node;
+    }
+
+  visited->add (node);
+
+  cgraph_edge *cs;
+  for (cs = node->callers; cs; cs = cs->next_caller)
+    {
+      cgraph_node *caller = cs->caller->function_symbol ();
+      if (!visited->contains (caller))
+	{
+	  cgraph_node* main = get_ancestor_main_dfs (visited, path, caller);
+	  if (main)
+            {
+            path->safe_push(node);
+            return main;
+            }
+	}
+    }
+  return NULL;
+}
+
+static cgraph_node*
+get_path_from_main_to(cgraph_node *node, vec<cgraph_node*>* path)
+{
+   hash_set<cgraph_node*> visited;
+   cgraph_node* main = get_ancestor_main_dfs (&visited, path, node);
+   visited.empty();
+   return main;
+}
+
+
+/*
+ * Verifying that a stmt s1 is dominated by a stmt s2
+ * across function borders is not trivial with the available
+ * infrastructure (dominator algorithm on bb level plus cgraph).
+ * If we rule out external calls/callbacks, then we still need
+ * to guarantee a write on each possible path to the read.
+ *
+ * The implemented solution to this problem, which is of course too 
strict,
+ * but also not too compute/memory intensive is as follows:
+ *
+ * - Check if write is reachable by main() by only looking into
+ *   the first bb in each function on the path.
+ * - All call stmts between main() and write must not possibly
+ *   reach a read. We consider indirect call statements as
+ *   possible reaching read (unless we can prove opposite).
+ *
+ * The external calls/callbacks are ruled out as follows:
+ *
+ * - all possible ancestors of read must not be external visible
+ * - all possible ancestors of read must not be function pointers
+ *   ???????????
+ *
+ *
+ */
+static bool
+write_before_read_across_function_borders (struct ipa_ref *write, 
struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
+  cgraph_node *main;
+
+  /* Get main() function. */
+  vec<cgraph_node*> pw = vNULL;
+  main = get_path_from_main_to(w_cnode, &pw);
+  if (!main)
+    {
+      if (dump_file)
+	fprintf (dump_file, "main() is not ancestor of write -> skipping.\n");
+      return false;
+    }
+
+  vec<cgraph_node*> pr = vNULL;
+  cgraph_node *main_to_read = get_path_from_main_to(r_cnode, &pr);
+  if (!main_to_read)
+     {
+      if (dump_file)
+	fprintf (dump_file, "main() is not ancestor of read -> skipping.\n");
+      return false;
+     }
+
+  int i;
+  cgraph_node *node_in_pr;
+  FOR_EACH_VEC_ELT (pr, i, node_in_pr)
+     {
+      // I think main has to be externally visible.
+      if (node_in_pr == main) continue;
+
+      /* Assure that all paths to read are not externally visible.  */
+      if (cgraph_externally_visible_p (node_in_pr, flag_whole_program))
+        {
+         if (dump_file)
+	    fprintf (dump_file, "%s is externally visible... skipping\n", 
node_in_pr->dump_asm_name ());
+         return false;
+        }
+
+      /* Assure that all paths to read are not used as function 
pointers.  */
+      if (node_in_pr->address_taken)
+        {
+         if (dump_file)
+	    fprintf (dump_file, "%s might be function pointer... skipping\n", 
node_in_pr->dump_asm_name ());
+         return false;
+        }
+     }
+
+  cgraph_node* caller = main;
+  cgraph_node* node_in_pw;
+  FOR_EACH_VEC_ELT (pw, i, node_in_pw)
+     {
+     gcc_assert (w_cnode != r_cnode);
+     if (node_in_pw == r_cnode && path_exists(r_cnode, w_cnode))
+        {
+        return call_before_read(write, read);
+        }
+
+     if (node_in_pw == w_cnode && path_exists(w_cnode, r_cnode))
+        {
+        return write_before_call(write, read);
+        }
+
+     if (node_in_pw == main)
+        {
+        continue;
+        }
+
+     if (dump_file)
+	fprintf (dump_file, "get_nondominated_callees from %s to %s\n", 
caller->dump_asm_name(), node_in_pw->dump_asm_name());
+
+     while(node_in_pw->clone_of) node_in_pw = node_in_pw->clone_of;
+     vec<cgraph_node *> non_dominated_callees = 
get_nondominated_callees(caller, node_in_pw);
+     cgraph_node* non_dominated_callee;
+     int j;
+     FOR_EACH_VEC_ELT(non_dominated_callees, j, non_dominated_callee)
+        {
+        if (dump_file)
+	   fprintf (dump_file, "callee %d %s\n", j, 
non_dominated_callee->dump_asm_name());
+        if(path_exists(non_dominated_callee, r_cnode)) {
+           return false;
+           }
+        }
+
+
+        caller = node_in_pw;
+     }
+   return true;
+}
+
+
+static bool
+write_before_call (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
+
+  gcc_assert (path_exists(w_cnode, r_cnode));
+  gcc_assert (w_cnode != r_cnode);
+
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (in_lto_p) {
+    while (w_cnode->clone_of) w_cnode = w_cnode->clone_of;
+    w_cnode->get_untransformed_body ();
+  }
+
+  function *func = DECL_STRUCT_FUNCTION (w_cnode->decl);
+  basic_block c_bb;
+  vec<struct cgraph_node*> callees = vNULL;
+  FOR_EACH_BB_FN(c_bb, func)
+  {
+     bool self_dominates = w_bb == c_bb;
+     bool w_bb_dominates_c_bb = bb1_dominates_bb2(w_bb, c_bb, w_cnode);
+     if (w_bb_dominates_c_bb && !self_dominates) continue;
+
+     for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p 
(gsi); gsi_next (&gsi))
+     {
+         gimple *stmt = gsi_stmt (gsi);
+
+         if (stmt == write->stmt) {
+            break;
+         }
+
+         if (gimple_code (stmt) != GIMPLE_CALL) {
+	    continue;
+         }
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+
+         if (fndecl_built_in_p (callee_node->decl))
+            continue;
+
+         if (dump_file)
+           fprintf (dump_file, "found callee %s\n", 
callee_node->dump_asm_name());
+         callees.safe_push (callee_node);
+
+    }
+  }
+  cgraph_node* callee;
+  int i;
+  FOR_EACH_VEC_ELT(callees, i, callee)
+  {
+     if(path_exists(callee, r_cnode))
+     {
+        return false;
+     }
+  }
+
+  return true;
+}
+
+static bool
+call_before_read (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
+
+  gcc_assert (path_exists(r_cnode, w_cnode));
+  gcc_assert (w_cnode != r_cnode);
+
+  basic_block w_bb = gimple_bb (write->stmt);
+  basic_block r_bb = gimple_bb (read->stmt);
+
+  if (in_lto_p)
+    r_cnode->get_untransformed_body ();
+
+  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
+  basic_block c_bb;
+  FOR_EACH_BB_FN(c_bb, func)
+  {
+     bool self_dominates = c_bb == r_bb;
+     bool call_dominates_read = bb1_dominates_bb2(c_bb, r_bb, r_cnode);
+     if (!call_dominates_read && !self_dominates) continue;
+
+     for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p 
(gsi); gsi_next (&gsi))
+     {
+         gimple *stmt = gsi_stmt (gsi);
+
+         // self dominance
+         if (stmt == read->stmt) {
+            break;
+         }
+
+         if (gimple_code (stmt) != GIMPLE_CALL) {
+	    continue;
+         }
+
+         tree callee_decl = gimple_call_fndecl (stmt);
+         cgraph_node *callee_node = cgraph_node::get (callee_decl);
+
+         if(path_exists(callee_node, w_cnode)) {
+            return true;
+         }
+    }
+  }
+  return false;
+}
+
+static bool
+write_before_read (struct ipa_ref *write, struct ipa_ref *read)
+{
+  symtab_node *w_node = write->referring;
+  cgraph_node *w_cnode = dyn_cast <cgraph_node *> (w_node);
+  symtab_node *r_node = read->referring;
+  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
+
+  if (w_cnode == r_cnode)
+    /* Within the same function.  */
+    return write_before_read_in_function (write, read);
+
+  /* Not within the same function. */
+  return write_before_read_across_function_borders (write, read);
+}
+
+static bool
+write_before_all_reads (struct ipa_ref* write, const ipa_ref_vec 
&reads)
+{
+  int i;
+  struct ipa_ref* read;
+
+  FOR_EACH_VEC_ELT (reads, i, read)
+    {
+      load_function_body_of_ipa_ref (read);
+      if (!write_before_read (write, read))
+	return false;
+    }
+  return true;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+static void
+propagate_constant_to_read (tree write_val, struct ipa_ref* ref)
+{
+  gcc_assert (ref->use == IPA_REF_LOAD);
+
+  //XXX: Already done for all reads in write_before_all_reads()
+  load_function_body_of_ipa_ref (ref);
+
+  gimple *read_stmt = ref->stmt;
+
+  gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN);
+  gcc_assert (gimple_num_ops (read_stmt) == 2);
+
+  tree old_lhs = gimple_op (read_stmt, 0);
+
+//  if (dump_file)
+//    {
+//      fprintf (dump_file, "Old lhs:\n");
+//      dump_node (old_lhs, TDF_DETAILS, dump_file);
+//      fprintf (dump_file, "Old lhs type:\n");
+//      tree old_lhs_type = TREE_TYPE (old_lhs);
+//      dump_node (old_lhs_type, TDF_DETAILS, dump_file);
+//      fprintf (dump_file, "Old val:\n");
+//      tree old_val = gimple_op (read_stmt, 1);
+//      dump_node (old_val, TDF_DETAILS, dump_file);
+//      fprintf (dump_file, "Old val type:\n");
+//      tree old_val_type = TREE_TYPE (old_val);
+//      dump_node (old_val_type, TDF_DETAILS, dump_file);
+//    }
+
+  /*
+   * SSA works on function level and the SSA API
+   * assumes the global 'cfun' to be set properly.
+   * Let's do so.
+   */
+  symtab_node *r_node = ref->referring;
+  cgraph_node *r_cnode = dyn_cast <cgraph_node *> (r_node);
+  while (r_cnode->clone_of) r_cnode = r_cnode->clone_of;
+  push_cfun (DECL_STRUCT_FUNCTION (r_cnode->decl));
+
+  /* Build new stmt and replace old.  */
+  gimple_stmt_iterator gsi = gsi_for_stmt (read_stmt);
+
+  tree new_lhs = make_ssa_name (TREE_TYPE (old_lhs));
+  gimple *new_read_stmt = gimple_build_assign(new_lhs, write_val);
+  gsi_insert_before (&gsi, new_read_stmt, GSI_SAME_STMT);
+  /* Fixup SSA uses.  */
+  gimple *use_stmt;
+  imm_use_iterator use_iter;
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, old_lhs)
+    {
+      use_operand_p use_p;
+      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
+        SET_USE (use_p, new_lhs);
+      update_stmt (use_stmt);
+    }
+
+  pop_cfun ();
+
+}
+
+static void
+propagate_constant_to_reads (tree write_val, const ipa_ref_vec &reads)
+{
+  int i;
+  struct ipa_ref* ref;
+
+  /* Iterate over reads and replace them by constant.  */
+  FOR_EACH_VEC_ELT (reads, i, ref)
+  {
+    propagate_constant_to_read (write_val, ref);
+  }
+}
+
+static void
+ipa_initcall_get_writes_and_reads(varpool_node *vnode, ipa_ref_vec 
*writes, ipa_ref_vec *reads)
+{
+  int i;
+  struct ipa_ref *ref;
+
+  if (dump_file)
+    fprintf (dump_file, "%s for variable '%s'.\n", __func__, 
vnode->name ());
+
+  /* Only IPA_REF_STORE and IPA_REF_LOAD left.  */
+  for (i = 0; vnode->iterate_referring (i, ref); i++)
+    {
+      if (ref->use == IPA_REF_STORE)
+	writes->safe_push (ref);
+      if (ref->use == IPA_REF_LOAD)
+	reads->safe_push (ref);
+    }
+}
+
+/*
+ * Extracts the assigned constant, iff the given statement
+ * is a constant assignment. Returns NULL_TREE otherwise.
+ */
+static tree
+extract_constant_from_initcall_write (struct ipa_ref *write)
+{
+  symtab_node *f_node = write->referring;
+  cgraph_node *f_cnode = dyn_cast <cgraph_node *> (f_node);
+
+  tree decl = f_cnode->decl;
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Not function decl -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  if (!f_cnode->has_gimple_body_p () && dump_file)
+    fprintf (dump_file, "Does NOT have gimple body!\n");
+  if (f_cnode->global.inlined_to && dump_file)
+    fprintf (dump_file, "Inlined To\n");
+
+  while (f_cnode->clone_of) {
+    if (dump_file) fprintf (dump_file, "Copy of...well, we still need 
the constant!\n");
+    f_cnode = f_cnode->clone_of;
+  }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "%s: for writer gimple:\n", __func__);
+      dump_vnode_ipa_ref(write);
+    }
+
+  /* Get the write function's body.  */
+  load_function_body_of_ipa_ref (write);
+
+  gimple *stmt = write->stmt;
+
+  /* Verify that we have an assignment.  */
+  if (gimple_code (stmt) != GIMPLE_ASSIGN)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Write stmt not assignment -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  /* Check if write's LHS (vnode) is not volatile.  */
+  tree lhs = gimple_assign_lhs (stmt);
+  if (TREE_THIS_VOLATILE (TREE_TYPE (lhs)))
+    {
+      if (dump_file)
+	fprintf (dump_file, "Variable volatile -> skipping.\n");
+      return NULL_TREE;
+    }
+
+  /* Check if RHS of write stmt is constant. */
+  if (!gimple_assign_is_single_const (stmt))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, "Found non-const operands.\n");
+	}
+      return NULL_TREE;
+    }
+
+  /* Extract the constant.  */
+  tree write_val = gimple_op (stmt, 1);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Const op:\n");
+      dump_node (write_val, TDF_DETAILS, dump_file);
+    }
+
+  return write_val;
+}
+
+static void
+ipa_initcall_cp_execute_for_var (varpool_node *vnode)
+{
+  ipa_ref_vec writes = vNULL;
+  ipa_ref_vec reads = vNULL;
+  struct ipa_ref* write;
+  tree write_val;
+  gimple_stmt_iterator gsi;
+  bool remove_permanently;
+
+  if (dump_file)
+    fprintf (dump_file, "%s for variable '%s'.\n", __func__, 
vnode->name ());
+
+  if (dump_file)
+    {
+      dump_node (vnode->decl, TDF_DETAILS, dump_file);
+    }
+
+  /* Variable must not be externally visible.  */
+  if (vnode->externally_visible_p ())
+    {
+      if (dump_file)
+	fprintf (dump_file, "\texternally visible -> skipping\n");
+      return;
+    }
+
+  /* All refs must be explicit.  */
+  if (!vnode->all_refs_explicit_p ())
+    {
+      if (dump_file)
+	fprintf (dump_file, "Not explicit variable refs -> skipping.\n");
+      return;
+    }
+
+  /* Check if any ref->use is IPA_REF_ALIAS.  */
+  if (vnode->has_aliases_p ())
+    {
+      if (dump_file)
+	fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n");
+      return;
+    }
+
+  /* Check if any ref->use is IPA_REF_ADDR.  */
+  if (symtab_node_address_matters_p (vnode))
+    {
+      if (dump_file)
+	fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n");
+      return;
+    }
+
+  /* We don't touch arrays.  */
+  if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Variable is array -> skipping.\n");
+      return;
+    }
+
+  /* Get all refs (must be REF_STORE or REF_LOAD).  */
+  ipa_initcall_get_writes_and_reads (vnode, &writes, &reads);
+
+  if (writes.length () > 1)
+    {
+      /* More than one writer.  */
+      if (dump_file)
+	fprintf (dump_file, "More than one writer -> skipping.\n");
+      goto end;
+    }
+  else if (writes.length () < 1)
+    {
+      /* No writer.  */
+      if (dump_file)
+	fprintf (dump_file, "No writer -> skipping.\n");
+      goto end;
+    }
+
+  /*
+   * Note:
+   * Limiting ourselfs to only one write is not necessary in general,
+   * but good enough to address the typical init() case.
+   * Big advantage is, that it makes the following code much easier.
+   */
+
+  /* Get (only) write ref.  */
+  write = writes.pop ();
+
+  /* Check if write's RHS is a constant and get it.  */
+  write_val = extract_constant_from_initcall_write (write);
+  if (write_val == NULL_TREE)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Write's RHS is not constant -> skipping.\n");
+      goto end;
+    }
+
+  /* Assure all reads are after the write.  */
+  if (!write_before_all_reads (write, reads))
+    {
+      if (dump_file)
+	fprintf (dump_file, "Write not guaranteed to be before read -> 
skipping.\n");
+      goto end;
+    }
+
+  /* Propagate constant to reads.  */
+  propagate_constant_to_reads (write_val, reads);
+
+  /* Finally remove the write.  */
+  gsi = gsi_for_stmt (write->stmt);
+  remove_permanently = false; //XXX: fails with true???
+  //gsi_remove (&gsi, remove_permanently);
+
+  if (dump_file)
+    fprintf (dump_file, "Eliminated variable '%s'.\n\n", vnode->name 
());
+
+end:
+  writes.release ();
+  reads.release ();
+}
+
+static unsigned int
+ipa_initcall_cp_execute (void)
+{
+  varpool_node *vnode;
+
+  FOR_EACH_VARIABLE (vnode)
+    {
+      ipa_initcall_cp_execute_for_var (vnode);
+    }
+
+  return 0;
+
+}
+
+namespace {
+
+const pass_data pass_data_ipa_initcall_cp =
+{
+  SIMPLE_IPA_PASS, /* type */
+  "initcall_cp", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab ), /* 
todo_flags_finish */
+};
+
+class pass_ipa_initcall_cp : public simple_ipa_opt_pass
+{
+public:
+  pass_ipa_initcall_cp (gcc::context *ctxt)
+    : simple_ipa_opt_pass (pass_data_ipa_initcall_cp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return flag_initcall_cp;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return ipa_initcall_cp_execute();
+    }
+
+}; // class pass_ipa_initcall_cp
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_ipa_initcall_cp (gcc::context *ctxt)
+{
+  return new pass_ipa_initcall_cp (ctxt);
+}
diff --git a/gcc/passes.def b/gcc/passes.def
index 1a7fd14..fe699e7 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -147,7 +147,7 @@  along with GCC; see the file COPYING3.  If not see
    NEXT_PASS (pass_ipa_profile);
    NEXT_PASS (pass_ipa_icf);
    NEXT_PASS (pass_ipa_devirt);
-  NEXT_PASS (pass_ipa_cp);
+  //NEXT_PASS (pass_ipa_cp);
    NEXT_PASS (pass_ipa_cdtor_merge);
    NEXT_PASS (pass_ipa_hsa);
    NEXT_PASS (pass_ipa_fn_summary);
@@ -155,6 +155,7 @@  along with GCC; see the file COPYING3.  If not see
    NEXT_PASS (pass_ipa_pure_const);
    NEXT_PASS (pass_ipa_free_fn_summary, false /* small_p */);
    NEXT_PASS (pass_ipa_reference);
+  NEXT_PASS (pass_ipa_initcall_cp);
    /* This pass needs to be scheduled after any IP code duplication.   
*/
    NEXT_PASS (pass_ipa_single_use);
    /* Comdat privatization come last, as direct references to comdat 
local
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 1c8df3d..26a8289 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -498,6 +498,7 @@  extern ipa_opt_pass_d *make_pass_ipa_fn_summary 
(gcc::context *ctxt);
  extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
  extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context 
*ctxt);
  extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context 
*ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_initcall_cp (gcc::context