diff mbox series

Avid ggc_alloc and push_cfun during LTO streaming

Message ID 20191011090252.akbdtyftgcojos2e@kam.mff.cuni.cz
State New
Headers show
Series Avid ggc_alloc and push_cfun during LTO streaming | expand

Commit Message

Jan Hubicka Oct. 11, 2019, 9:02 a.m. UTC
Hi,
this patch prevents tree creation druing WPA stream out (to avoid
touching pages and triggering COW).  It fixes the following
 - gimple streamer produces MEM_REF wrappings for global decls.
   This is to preserve the type of access and is not necessary for
   WPA->LTRANS streaming when decls ar eno longer going to be merged.
 - we renumber stmt uids during streaming WPA summaries
 - loop optimizer is initialized in output_function.
After testing the patch I noticed that output_function does one extra
renumbering of stmts. This seems quite broken and I will fix it
incrementally.

Bootstrapped/regtested x86_64-linux, comitted.

	* gimple-streamer-out.c (output_gimple_stmt): Add explicit function
	parameter.
	* lto-streamer-out.c: Include tree-dfa.h.
	(output_cfg): Do not use cfun.
	(lto_prepare_function_for_streaming): New.
	(output_function): Do not push cfun; do not initialize loop optimizer.
	* lto-streamer.h (lto_prepare_function_for_streaming): Declare.
	* passes.c (ipa_write_summaries): Use it.
	(ipa_write_optimization_summaries): Do not modify bodies.
	* tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter.
	* tree.dfa.h (renumber_gimple_stmt_uids): Update prototype.
	* tree-ssa-dse.c (pass_dse::execute): Update use of
	renumber_gimple_stmt_uids.
	* tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): Likewise.

	* lto.c (lto_wpa_write_files): Prepare all bodies for streaming.

Comments

Richard Biener Oct. 11, 2019, 11:37 a.m. UTC | #1
On Fri, Oct 11, 2019 at 11:03 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> Hi,
> this patch prevents tree creation druing WPA stream out (to avoid
> touching pages and triggering COW).  It fixes the following
>  - gimple streamer produces MEM_REF wrappings for global decls.
>    This is to preserve the type of access and is not necessary for
>    WPA->LTRANS streaming when decls ar eno longer going to be merged.
>  - we renumber stmt uids during streaming WPA summaries
>  - loop optimizer is initialized in output_function.
> After testing the patch I noticed that output_function does one extra
> renumbering of stmts. This seems quite broken and I will fix it
> incrementally.
>
> Bootstrapped/regtested x86_64-linux, comitted.

Huh.  Why do we stream function bodies at WPA time at all?
We should already have input sections we can copy/remap?

That is, why does gcc_assert (!flag_wpa) in output_function trip?

Richard.

>
>         * gimple-streamer-out.c (output_gimple_stmt): Add explicit function
>         parameter.
>         * lto-streamer-out.c: Include tree-dfa.h.
>         (output_cfg): Do not use cfun.
>         (lto_prepare_function_for_streaming): New.
>         (output_function): Do not push cfun; do not initialize loop optimizer.
>         * lto-streamer.h (lto_prepare_function_for_streaming): Declare.
>         * passes.c (ipa_write_summaries): Use it.
>         (ipa_write_optimization_summaries): Do not modify bodies.
>         * tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter.
>         * tree.dfa.h (renumber_gimple_stmt_uids): Update prototype.
>         * tree-ssa-dse.c (pass_dse::execute): Update use of
>         renumber_gimple_stmt_uids.
>         * tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): Likewise.
>
>         * lto.c (lto_wpa_write_files): Prepare all bodies for streaming.
> Index: gimple-streamer-out.c
> ===================================================================
> --- gimple-streamer-out.c       (revision 276850)
> +++ gimple-streamer-out.c       (working copy)
> @@ -57,7 +57,7 @@ output_phi (struct output_block *ob, gph
>  /* Emit statement STMT on the main stream of output block OB.  */
>
>  static void
> -output_gimple_stmt (struct output_block *ob, gimple *stmt)
> +output_gimple_stmt (struct output_block *ob, struct function *fn, gimple *stmt)
>  {
>    unsigned i;
>    enum gimple_code code;
> @@ -80,7 +80,7 @@ output_gimple_stmt (struct output_block
>                      as_a <gassign *> (stmt)),
>                    1);
>    bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1);
> -  hist = gimple_histogram_value (cfun, stmt);
> +  hist = gimple_histogram_value (fn, stmt);
>    bp_pack_value (&bp, hist != NULL, 1);
>    bp_pack_var_len_unsigned (&bp, stmt->subcode);
>
> @@ -139,7 +139,7 @@ output_gimple_stmt (struct output_block
>              so that we do not have to deal with type mismatches on
>              merged symbols during IL read in.  The first operand
>              of GIMPLE_DEBUG must be a decl, not MEM_REF, though.  */
> -         if (op && (i || !is_gimple_debug (stmt)))
> +         if (!flag_wpa && op && (i || !is_gimple_debug (stmt)))
>             {
>               basep = &op;
>               if (TREE_CODE (*basep) == ADDR_EXPR)
> @@ -147,7 +147,7 @@ output_gimple_stmt (struct output_block
>               while (handled_component_p (*basep))
>                 basep = &TREE_OPERAND (*basep, 0);
>               if (VAR_P (*basep)
> -                 && !auto_var_in_fn_p (*basep, current_function_decl)
> +                 && !auto_var_in_fn_p (*basep, fn->decl)
>                   && !DECL_REGISTER (*basep))
>                 {
>                   bool volatilep = TREE_THIS_VOLATILE (*basep);
> @@ -228,7 +228,7 @@ output_bb (struct output_block *ob, basi
>               print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM);
>             }
>
> -         output_gimple_stmt (ob, stmt);
> +         output_gimple_stmt (ob, fn, stmt);
>
>           /* Emit the EH region holding STMT.  */
>           region = lookup_stmt_eh_lp_fn (fn, stmt);
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c   (revision 276850)
> +++ lto/lto.c   (working copy)
> @@ -304,6 +304,13 @@ lto_wpa_write_files (void)
>
>    timevar_push (TV_WHOPR_WPA_IO);
>
> +  cgraph_node *node;
> +  /* Do body modifications needed for streaming before we fork out
> +     worker processes.  */
> +  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
> +    if (gimple_has_body_p (node->decl))
> +      lto_prepare_function_for_streaming (node);
> +
>    /* Generate a prefix for the LTRANS unit files.  */
>    blen = strlen (ltrans_output_list);
>    temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c  (revision 276850)
> +++ lto-streamer-out.c  (working copy)
> @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.
>  #include "debug.h"
>  #include "omp-offload.h"
>  #include "print-tree.h"
> +#include "tree-dfa.h"
>
>
>  static void lto_write_tree (struct output_block*, tree, bool);
> @@ -1893,7 +1894,7 @@ output_cfg (struct output_block *ob, str
>
>    streamer_write_hwi (ob, -1);
>
> -  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
> +  bb = ENTRY_BLOCK_PTR_FOR_FN (fn);
>    while (bb->next_bb)
>      {
>        streamer_write_hwi (ob, bb->next_bb->index);
> @@ -1902,9 +1903,6 @@ output_cfg (struct output_block *ob, str
>
>    streamer_write_hwi (ob, -1);
>
> -  /* ???  The cfgloop interface is tied to cfun.  */
> -  gcc_assert (cfun == fn);
> -
>    /* Output the number of loops.  */
>    streamer_write_uhwi (ob, number_of_loops (fn));
>
> @@ -2062,6 +2060,22 @@ collect_block_tree_leafs (tree root, vec
>        collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs);
>  }
>
> +/* This performs function body modifications that are needed for streaming
> +   to work.  */
> +
> +void
> +lto_prepare_function_for_streaming (struct cgraph_node *node)
> +{
> +  if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl)))
> +    {
> +      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> +      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> +      loop_optimizer_finalize ();
> +      pop_cfun ();
> +    }
> +  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl));
> +}
> +
>  /* Output the body of function NODE->DECL.  */
>
>  static void
> @@ -2085,9 +2099,6 @@ output_function (struct cgraph_node *nod
>
>    gcc_assert (current_function_decl == NULL_TREE && cfun == NULL);
>
> -  /* Set current_function_decl and cfun.  */
> -  push_cfun (fn);
> -
>    /* Make string 0 be a NULL string.  */
>    streamer_write_char_stream (ob->string_stream, 0);
>
> @@ -2124,9 +2135,6 @@ output_function (struct cgraph_node *nod
>       debug info.  */
>    if (gimple_has_body_p (function))
>      {
> -      /* Fixup loops if required to match discovery done in the reader.  */
> -      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> -
>        streamer_write_uhwi (ob, 1);
>        output_struct_function_base (ob, fn);
>
> @@ -2143,8 +2151,8 @@ output_function (struct cgraph_node *nod
>          statement numbers.  We do not assign UIDs to PHIs here because
>          virtual PHIs get re-computed on-the-fly which would make numbers
>          inconsistent.  */
> -      set_gimple_stmt_max_uid (cfun, 0);
> -      FOR_ALL_BB_FN (bb, cfun)
> +      set_gimple_stmt_max_uid (fn, 0);
> +      FOR_ALL_BB_FN (bb, fn)
>         {
>           for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
> @@ -2153,25 +2161,25 @@ output_function (struct cgraph_node *nod
>
>               /* Virtual PHIs are not going to be streamed.  */
>               if (!virtual_operand_p (gimple_phi_result (stmt)))
> -               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>           for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
>             {
>               gimple *stmt = gsi_stmt (gsi);
> -             gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +             gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>         }
>        /* To avoid keeping duplicate gimple IDs in the statements, renumber
>          virtual phis now.  */
> -      FOR_ALL_BB_FN (bb, cfun)
> +      FOR_ALL_BB_FN (bb, fn)
>         {
>           for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
>             {
>               gphi *stmt = gsi.phi ();
>               if (virtual_operand_p (gimple_phi_result (stmt)))
> -               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>         }
>
> @@ -2183,9 +2191,6 @@ output_function (struct cgraph_node *nod
>        streamer_write_record_start (ob, LTO_null);
>
>        output_cfg (ob, fn);
> -
> -      loop_optimizer_finalize ();
> -      pop_cfun ();
>     }
>    else
>      streamer_write_uhwi (ob, 0);
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h      (revision 276850)
> +++ lto-streamer.h      (working copy)
> @@ -909,6 +909,7 @@ void lto_output_decl_state_refs (struct
>                                  struct lto_out_decl_state *);
>  void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
>  void lto_output_init_mode_table (void);
> +void lto_prepare_function_for_streaming (cgraph_node *);
>
>
>  /* In lto-cgraph.c  */
> Index: passes.c
> ===================================================================
> --- passes.c    (revision 276850)
> +++ passes.c    (working copy)
> @@ -2705,20 +2705,12 @@ ipa_write_summaries (void)
>      {
>        struct cgraph_node *node = order[i];
>
> -      if (gimple_has_body_p (node->decl))
> +      if (node->definition && node->need_lto_streaming)
>         {
> -         /* When streaming out references to statements as part of some IPA
> -            pass summary, the statements need to have uids assigned and the
> -            following does that for all the IPA passes here. Naturally, this
> -            ordering then matches the one IPA-passes get in their stmt_fixup
> -            hooks.  */
> -
> -         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> -         renumber_gimple_stmt_uids ();
> -         pop_cfun ();
> +         if (gimple_has_body_p (node->decl))
> +           lto_prepare_function_for_streaming (node);
> +         lto_set_symtab_encoder_in_partition (encoder, node);
>         }
> -      if (node->definition && node->need_lto_streaming)
> -        lto_set_symtab_encoder_in_partition (encoder, node);
>      }
>
>    FOR_EACH_DEFINED_FUNCTION (node)
> @@ -2786,28 +2778,13 @@ void
>  ipa_write_optimization_summaries (lto_symtab_encoder_t encoder)
>  {
>    struct lto_out_decl_state *state = lto_new_out_decl_state ();
> -  lto_symtab_encoder_iterator lsei;
>    state->symtab_node_encoder = encoder;
>
>    lto_output_init_mode_table ();
>    lto_push_out_decl_state (state);
> -  for (lsei = lsei_start_function_in_partition (encoder);
> -       !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
> -    {
> -      struct cgraph_node *node = lsei_cgraph_node (lsei);
> -      /* When streaming out references to statements as part of some IPA
> -        pass summary, the statements need to have uids assigned.
> -
> -        For functions newly born at WPA stage we need to initialize
> -        the uids here.  */
> -      if (node->definition
> -         && gimple_has_body_p (node->decl))
> -       {
> -         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> -         renumber_gimple_stmt_uids ();
> -         pop_cfun ();
> -       }
> -    }
> +
> +  /* Be sure that we did not forget to renumber stmt uids.  */
> +  gcc_checking_assert (flag_wpa);
>
>    gcc_assert (flag_wpa);
>    pass_manager *passes = g->get_passes ();
> Index: tree-dfa.c
> ===================================================================
> --- tree-dfa.c  (revision 276850)
> +++ tree-dfa.c  (working copy)
> @@ -61,23 +61,23 @@ static void collect_dfa_stats (struct df
>  /* Renumber all of the gimple stmt uids.  */
>
>  void
> -renumber_gimple_stmt_uids (void)
> +renumber_gimple_stmt_uids (struct function *fun)
>  {
>    basic_block bb;
>
> -  set_gimple_stmt_max_uid (cfun, 0);
> -  FOR_ALL_BB_FN (bb, cfun)
> +  set_gimple_stmt_max_uid (fun, 0);
> +  FOR_ALL_BB_FN (bb, fun)
>      {
>        gimple_stmt_iterator bsi;
>        for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
>         {
>           gimple *stmt = gsi_stmt (bsi);
> -         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
>         }
>        for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
>         {
>           gimple *stmt = gsi_stmt (bsi);
> -         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
>         }
>      }
>  }
> Index: tree-dfa.h
> ===================================================================
> --- tree-dfa.h  (revision 276850)
> +++ tree-dfa.h  (working copy)
> @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.
>  #ifndef GCC_TREE_DFA_H
>  #define GCC_TREE_DFA_H
>
> -extern void renumber_gimple_stmt_uids (void);
> +extern void renumber_gimple_stmt_uids (struct function *);
>  extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int);
>  extern void dump_variable (FILE *, tree);
>  extern void debug_variable (tree);
> Index: tree-ssa-dse.c
> ===================================================================
> --- tree-ssa-dse.c      (revision 276850)
> +++ tree-ssa-dse.c      (working copy)
> @@ -1113,7 +1113,7 @@ pass_dse::execute (function *fun)
>  {
>    need_eh_cleanup = BITMAP_ALLOC (NULL);
>
> -  renumber_gimple_stmt_uids ();
> +  renumber_gimple_stmt_uids (cfun);
>
>    /* We might consider making this a property of each pass so that it
>       can be [re]computed on an as-needed basis.  Particularly since
> Index: tree-ssa-math-opts.c
> ===================================================================
> --- tree-ssa-math-opts.c        (revision 276850)
> +++ tree-ssa-math-opts.c        (working copy)
> @@ -3850,7 +3850,7 @@ pass_optimize_widening_mul::execute (fun
>
>    memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
>    calculate_dominance_info (CDI_DOMINATORS);
> -  renumber_gimple_stmt_uids ();
> +  renumber_gimple_stmt_uids (cfun);
>
>    math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>
Jan Hubicka Oct. 11, 2019, 11:47 a.m. UTC | #2
> On Fri, Oct 11, 2019 at 11:03 AM Jan Hubicka <hubicka@ucw.cz> wrote:
> >
> > Hi,
> > this patch prevents tree creation druing WPA stream out (to avoid
> > touching pages and triggering COW).  It fixes the following
> >  - gimple streamer produces MEM_REF wrappings for global decls.
> >    This is to preserve the type of access and is not necessary for
> >    WPA->LTRANS streaming when decls ar eno longer going to be merged.
> >  - we renumber stmt uids during streaming WPA summaries
> >  - loop optimizer is initialized in output_function.
> > After testing the patch I noticed that output_function does one extra
> > renumbering of stmts. This seems quite broken and I will fix it
> > incrementally.
> >
> > Bootstrapped/regtested x86_64-linux, comitted.
> 
> Huh.  Why do we stream function bodies at WPA time at all?
> We should already have input sections we can copy/remap?
> 
> That is, why does gcc_assert (!flag_wpa) in output_function trip?

Because we produce new bodies (such as merged ctors) and also bodies
read by ipa-ICF to memory are then streamed form in-memory copies rather
than the pickled sections.

The overall idea is to keep things flexble enough so IPA passes can
produce new functions and also load bodies to memory and modify them.
The second is useful for development where one can first get pass
working modifying at WPA time and then work on optimization summaries.
I also think we may end up with passes that does more involved
modifications on small portions of program (perhaps matric reorg could)
I am also using this to play with re-running early opts at WPA time and
seeing how many optimizations are missed by not early
inlining/propagating dead code via nothrow etc etc.

Honza
diff mbox series

Patch

Index: gimple-streamer-out.c
===================================================================
--- gimple-streamer-out.c	(revision 276850)
+++ gimple-streamer-out.c	(working copy)
@@ -57,7 +57,7 @@  output_phi (struct output_block *ob, gph
 /* Emit statement STMT on the main stream of output block OB.  */
 
 static void
-output_gimple_stmt (struct output_block *ob, gimple *stmt)
+output_gimple_stmt (struct output_block *ob, struct function *fn, gimple *stmt)
 {
   unsigned i;
   enum gimple_code code;
@@ -80,7 +80,7 @@  output_gimple_stmt (struct output_block
 		     as_a <gassign *> (stmt)),
 		   1);
   bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1);
-  hist = gimple_histogram_value (cfun, stmt);
+  hist = gimple_histogram_value (fn, stmt);
   bp_pack_value (&bp, hist != NULL, 1);
   bp_pack_var_len_unsigned (&bp, stmt->subcode);
 
@@ -139,7 +139,7 @@  output_gimple_stmt (struct output_block
 	     so that we do not have to deal with type mismatches on
 	     merged symbols during IL read in.  The first operand
 	     of GIMPLE_DEBUG must be a decl, not MEM_REF, though.  */
-	  if (op && (i || !is_gimple_debug (stmt)))
+	  if (!flag_wpa && op && (i || !is_gimple_debug (stmt)))
 	    {
 	      basep = &op;
 	      if (TREE_CODE (*basep) == ADDR_EXPR)
@@ -147,7 +147,7 @@  output_gimple_stmt (struct output_block
 	      while (handled_component_p (*basep))
 		basep = &TREE_OPERAND (*basep, 0);
 	      if (VAR_P (*basep)
-		  && !auto_var_in_fn_p (*basep, current_function_decl)
+		  && !auto_var_in_fn_p (*basep, fn->decl)
 		  && !DECL_REGISTER (*basep))
 		{
 		  bool volatilep = TREE_THIS_VOLATILE (*basep);
@@ -228,7 +228,7 @@  output_bb (struct output_block *ob, basi
 	      print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM);
 	    }
 
-	  output_gimple_stmt (ob, stmt);
+	  output_gimple_stmt (ob, fn, stmt);
 
 	  /* Emit the EH region holding STMT.  */
 	  region = lookup_stmt_eh_lp_fn (fn, stmt);
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 276850)
+++ lto/lto.c	(working copy)
@@ -304,6 +304,13 @@  lto_wpa_write_files (void)
 
   timevar_push (TV_WHOPR_WPA_IO);
 
+  cgraph_node *node;
+  /* Do body modifications needed for streaming before we fork out
+     worker processes.  */
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+    if (gimple_has_body_p (node->decl))
+      lto_prepare_function_for_streaming (node);
+
   /* Generate a prefix for the LTRANS unit files.  */
   blen = strlen (ltrans_output_list);
   temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 276850)
+++ lto-streamer-out.c	(working copy)
@@ -43,6 +43,7 @@  along with GCC; see the file COPYING3.
 #include "debug.h"
 #include "omp-offload.h"
 #include "print-tree.h"
+#include "tree-dfa.h"
 
 
 static void lto_write_tree (struct output_block*, tree, bool);
@@ -1893,7 +1894,7 @@  output_cfg (struct output_block *ob, str
 
   streamer_write_hwi (ob, -1);
 
-  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+  bb = ENTRY_BLOCK_PTR_FOR_FN (fn);
   while (bb->next_bb)
     {
       streamer_write_hwi (ob, bb->next_bb->index);
@@ -1902,9 +1903,6 @@  output_cfg (struct output_block *ob, str
 
   streamer_write_hwi (ob, -1);
 
-  /* ???  The cfgloop interface is tied to cfun.  */
-  gcc_assert (cfun == fn);
-
   /* Output the number of loops.  */
   streamer_write_uhwi (ob, number_of_loops (fn));
 
@@ -2062,6 +2060,22 @@  collect_block_tree_leafs (tree root, vec
       collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs);
 }
 
+/* This performs function body modifications that are needed for streaming
+   to work.  */
+
+void
+lto_prepare_function_for_streaming (struct cgraph_node *node)
+{
+  if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl)))
+    {
+      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+      loop_optimizer_finalize ();
+      pop_cfun ();
+    }
+  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl));
+}
+
 /* Output the body of function NODE->DECL.  */
 
 static void
@@ -2085,9 +2099,6 @@  output_function (struct cgraph_node *nod
 
   gcc_assert (current_function_decl == NULL_TREE && cfun == NULL);
 
-  /* Set current_function_decl and cfun.  */
-  push_cfun (fn);
-
   /* Make string 0 be a NULL string.  */
   streamer_write_char_stream (ob->string_stream, 0);
 
@@ -2124,9 +2135,6 @@  output_function (struct cgraph_node *nod
      debug info.  */
   if (gimple_has_body_p (function))
     {
-      /* Fixup loops if required to match discovery done in the reader.  */
-      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
-
       streamer_write_uhwi (ob, 1);
       output_struct_function_base (ob, fn);
 
@@ -2143,8 +2151,8 @@  output_function (struct cgraph_node *nod
 	 statement numbers.  We do not assign UIDs to PHIs here because
 	 virtual PHIs get re-computed on-the-fly which would make numbers
 	 inconsistent.  */
-      set_gimple_stmt_max_uid (cfun, 0);
-      FOR_ALL_BB_FN (bb, cfun)
+      set_gimple_stmt_max_uid (fn, 0);
+      FOR_ALL_BB_FN (bb, fn)
 	{
 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
@@ -2153,25 +2161,25 @@  output_function (struct cgraph_node *nod
 
 	      /* Virtual PHIs are not going to be streamed.  */
 	      if (!virtual_operand_p (gimple_phi_result (stmt)))
-	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
 	    {
 	      gimple *stmt = gsi_stmt (gsi);
-	      gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	      gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	}
       /* To avoid keeping duplicate gimple IDs in the statements, renumber
 	 virtual phis now.  */
-      FOR_ALL_BB_FN (bb, cfun)
+      FOR_ALL_BB_FN (bb, fn)
 	{
 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
 	    {
 	      gphi *stmt = gsi.phi ();
 	      if (virtual_operand_p (gimple_phi_result (stmt)))
-	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	}
 
@@ -2183,9 +2191,6 @@  output_function (struct cgraph_node *nod
       streamer_write_record_start (ob, LTO_null);
 
       output_cfg (ob, fn);
-
-      loop_optimizer_finalize ();
-      pop_cfun ();
    }
   else
     streamer_write_uhwi (ob, 0);
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 276850)
+++ lto-streamer.h	(working copy)
@@ -909,6 +909,7 @@  void lto_output_decl_state_refs (struct
 			         struct lto_out_decl_state *);
 void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
 void lto_output_init_mode_table (void);
+void lto_prepare_function_for_streaming (cgraph_node *);
 
 
 /* In lto-cgraph.c  */
Index: passes.c
===================================================================
--- passes.c	(revision 276850)
+++ passes.c	(working copy)
@@ -2705,20 +2705,12 @@  ipa_write_summaries (void)
     {
       struct cgraph_node *node = order[i];
 
-      if (gimple_has_body_p (node->decl))
+      if (node->definition && node->need_lto_streaming)
 	{
-	  /* When streaming out references to statements as part of some IPA
-	     pass summary, the statements need to have uids assigned and the
-	     following does that for all the IPA passes here. Naturally, this
-	     ordering then matches the one IPA-passes get in their stmt_fixup
-	     hooks.  */
-
-	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
-	  renumber_gimple_stmt_uids ();
-	  pop_cfun ();
+	  if (gimple_has_body_p (node->decl))
+	    lto_prepare_function_for_streaming (node);
+	  lto_set_symtab_encoder_in_partition (encoder, node);
 	}
-      if (node->definition && node->need_lto_streaming)
-        lto_set_symtab_encoder_in_partition (encoder, node);
     }
 
   FOR_EACH_DEFINED_FUNCTION (node)
@@ -2786,28 +2778,13 @@  void
 ipa_write_optimization_summaries (lto_symtab_encoder_t encoder)
 {
   struct lto_out_decl_state *state = lto_new_out_decl_state ();
-  lto_symtab_encoder_iterator lsei;
   state->symtab_node_encoder = encoder;
 
   lto_output_init_mode_table ();
   lto_push_out_decl_state (state);
-  for (lsei = lsei_start_function_in_partition (encoder);
-       !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
-    {
-      struct cgraph_node *node = lsei_cgraph_node (lsei);
-      /* When streaming out references to statements as part of some IPA
-	 pass summary, the statements need to have uids assigned.
-
-	 For functions newly born at WPA stage we need to initialize
-	 the uids here.  */
-      if (node->definition
-	  && gimple_has_body_p (node->decl))
-	{
-	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
-	  renumber_gimple_stmt_uids ();
-	  pop_cfun ();
-	}
-    }
+
+  /* Be sure that we did not forget to renumber stmt uids.  */
+  gcc_checking_assert (flag_wpa);
 
   gcc_assert (flag_wpa);
   pass_manager *passes = g->get_passes ();
Index: tree-dfa.c
===================================================================
--- tree-dfa.c	(revision 276850)
+++ tree-dfa.c	(working copy)
@@ -61,23 +61,23 @@  static void collect_dfa_stats (struct df
 /* Renumber all of the gimple stmt uids.  */
 
 void
-renumber_gimple_stmt_uids (void)
+renumber_gimple_stmt_uids (struct function *fun)
 {
   basic_block bb;
 
-  set_gimple_stmt_max_uid (cfun, 0);
-  FOR_ALL_BB_FN (bb, cfun)
+  set_gimple_stmt_max_uid (fun, 0);
+  FOR_ALL_BB_FN (bb, fun)
     {
       gimple_stmt_iterator bsi;
       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
 	{
 	  gimple *stmt = gsi_stmt (bsi);
-	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
 	}
       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
 	{
 	  gimple *stmt = gsi_stmt (bsi);
-	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
 	}
     }
 }
Index: tree-dfa.h
===================================================================
--- tree-dfa.h	(revision 276850)
+++ tree-dfa.h	(working copy)
@@ -20,7 +20,7 @@  along with GCC; see the file COPYING3.
 #ifndef GCC_TREE_DFA_H
 #define GCC_TREE_DFA_H
 
-extern void renumber_gimple_stmt_uids (void);
+extern void renumber_gimple_stmt_uids (struct function *);
 extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int);
 extern void dump_variable (FILE *, tree);
 extern void debug_variable (tree);
Index: tree-ssa-dse.c
===================================================================
--- tree-ssa-dse.c	(revision 276850)
+++ tree-ssa-dse.c	(working copy)
@@ -1113,7 +1113,7 @@  pass_dse::execute (function *fun)
 {
   need_eh_cleanup = BITMAP_ALLOC (NULL);
 
-  renumber_gimple_stmt_uids ();
+  renumber_gimple_stmt_uids (cfun);
 
   /* We might consider making this a property of each pass so that it
      can be [re]computed on an as-needed basis.  Particularly since
Index: tree-ssa-math-opts.c
===================================================================
--- tree-ssa-math-opts.c	(revision 276850)
+++ tree-ssa-math-opts.c	(working copy)
@@ -3850,7 +3850,7 @@  pass_optimize_widening_mul::execute (fun
 
   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
   calculate_dominance_info (CDI_DOMINATORS);
-  renumber_gimple_stmt_uids ();
+  renumber_gimple_stmt_uids (cfun);
 
   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));