Message ID | 20130828104752.GD8260@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
On Wed, 28 Aug 2013, Jan Hubicka wrote: > Hi, > this patch saves almost 1Gb of GGC reachable memory for firefox. > Currently we point to all streamed in trees from global decl in state and from > function specific states. The states are big and they dangle pointers. > This patch gets rid of states we do not need. > > We run GGC collect just once during usual WPA compilation. Currently > it is just after tree loading that introduce very little garbage. This > patch moves it after unreachable code removal when freeing happen. > > Still we do not suceed in much of reuse in between GGC and heap, so peak memory > use in TOP reduces by only 300MB. It may be interesting to allow random access > into per-function decl in states and read them only after decl merging (and > possibly unreachable function removal). > > Bootstrapped/regtested x86_64-linux, OK? > > * lto-symtab.c (lto_cgraph_replace_node): Free decl_in_state. > * cgraph.c (cgraph_release_function_body): Free decl_in_state. > * lto-section-in.c (lto_free_function_in_decl_state): New function. > (lto_free_function_in_decl_state_for_node): New function. > > * lto.c (read_cgraph_and_symbols): Remove ggc_collect; > clear section node; add comment why we do not collect. > Index: lto-symtab.c > =================================================================== > --- lto-symtab.c (revision 202047) > +++ lto-symtab.c (working copy) > @@ -80,6 +80,8 @@ lto_cgraph_replace_node (struct cgraph_n > /* Redirect incomming references. */ > ipa_clone_referring ((symtab_node)prevailing_node, &node->symbol.ref_list); > > + lto_free_function_in_decl_state_for_node ((symtab_node)node); > + > if (node->symbol.decl != prevailing_node->symbol.decl) > cgraph_release_function_body (node); > > Index: cgraph.c > =================================================================== > --- cgraph.c (revision 202047) > +++ cgraph.c (working copy) > @@ -1663,6 +1663,8 @@ cgraph_release_function_body (struct cgr > if (!node->used_as_abstract_origin && DECL_INITIAL (node->symbol.decl)) > DECL_INITIAL (node->symbol.decl) = error_mark_node; > release_function_body (node->symbol.decl); > + if (node->symbol.lto_file_data) > + lto_free_function_in_decl_state_for_node ((symtab_node) node); > } > > /* Remove the node from cgraph. */ > Index: lto-section-in.c > =================================================================== > --- lto-section-in.c (revision 202047) > +++ lto-section-in.c (working copy) > @@ -414,6 +414,41 @@ lto_get_function_in_decl_state (struct l > return slot? ((struct lto_in_decl_state*) *slot) : NULL; > } > > +/* Free decl_states. */ > + > +void > +lto_free_function_in_decl_state (struct lto_in_decl_state *state) > +{ > + int i; > + for (i = 0; i < LTO_N_DECL_STREAMS; i++) > + ggc_free (state->streams[i].trees); We likely also waste a lot of memory by means of GC overhead here. Moving this array out of GC space would be better (we know the exact size upfront) - should be doable with making lto_tree_ref_able GTY((user)) and a custom marker. > + ggc_free (state); > +} > + > +/* Free decl_states associated with NODE. This makes it possible to furhter > + release trees needed by the NODE's body. */ > + > +void > +lto_free_function_in_decl_state_for_node (symtab_node node) > +{ > + struct lto_in_decl_state temp; > + void **slot; > + > + if (!node->symbol.lto_file_data) > + return; > + > + temp.fn_decl = node->symbol.decl; > + slot = htab_find_slot (node->symbol.lto_file_data->function_decl_states, > + &temp, NO_INSERT); > + if (slot && *slot) > + { > + lto_free_function_in_decl_state ((struct lto_in_decl_state*) *slot); > + htab_clear_slot (node->symbol.lto_file_data->function_decl_states, > + slot); > + } > + node->symbol.lto_file_data = NULL; > +} > + > > /* Report read pass end of the section. */ > > Index: ipa.c > =================================================================== > --- ipa.c (revision 202047) > +++ ipa.c (working copy) > @@ -873,6 +873,17 @@ function_and_variable_visibility (bool w > segfault though. */ > symtab_dissolve_same_comdat_group_list ((symtab_node) node); > } > + if (node->symbol.externally_visible > + && DECL_COMDAT (node->symbol.decl) > + && comdat_can_be_unshared_p ((symtab_node) node)) > + { > + if (dump_file > + && DECL_VISIBILITY (node->symbol.decl) != VISIBILITY_HIDDEN) > + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", > + cgraph_node_name (node), node->symbol.order); > + DECL_VISIBILITY (node->symbol.decl) = VISIBILITY_HIDDEN; > + DECL_VISIBILITY_SPECIFIED (node->symbol.decl) = true; > + } > > if (node->thunk.thunk_p > && TREE_PUBLIC (node->symbol.decl)) > @@ -980,6 +991,17 @@ function_and_variable_visibility (bool w > symtab_dissolve_same_comdat_group_list ((symtab_node) vnode); > vnode->symbol.resolution = LDPR_PREVAILING_DEF_IRONLY; > } > + if (vnode->symbol.externally_visible > + && DECL_COMDAT (vnode->symbol.decl) > + && comdat_can_be_unshared_p ((symtab_node) vnode)) > + { > + if (dump_file > + && DECL_VISIBILITY (vnode->symbol.decl) == VISIBILITY_HIDDEN) > + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", > + varpool_node_name (vnode), vnode->symbol.order); > + DECL_VISIBILITY (vnode->symbol.decl) = VISIBILITY_HIDDEN; > + DECL_VISIBILITY_SPECIFIED (vnode->symbol.decl) = true; > + } > } > > if (dump_file) Stray changes? > Index: lto/lto.c > =================================================================== > --- lto/lto.c (revision 202047) > +++ lto/lto.c (working copy) > @@ -3429,7 +3429,6 @@ read_cgraph_and_symbols (unsigned nfiles > tree_scc_hash.dispose (); > obstack_free (&tree_scc_hash_obstack, NULL); > free_gimple_type_tables (); > - ggc_collect (); > > /* Set the hooks so that all of the ipa passes can read in their data. */ > lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); > @@ -3484,7 +3483,6 @@ read_cgraph_and_symbols (unsigned nfiles > } > htab_delete (tree_with_vars); > tree_with_vars = NULL; > - ggc_collect (); Just leave the ggc_collect ()s there, they are free. > timevar_pop (TV_IPA_LTO_DECL_MERGE); > /* Each pass will set the appropriate timer. */ > @@ -3503,6 +3501,9 @@ read_cgraph_and_symbols (unsigned nfiles > gcc_assert (all_file_decl_data[i]->symtab_node_encoder); > lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); > all_file_decl_data[i]->symtab_node_encoder = NULL; > + lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); > + all_file_decl_data[i]->global_decl_state = NULL; > + all_file_decl_data[i]->current_decl_state = NULL; > } > > /* Finally merge the cgraph according to the decl merging decisions. */ > @@ -3513,7 +3514,12 @@ read_cgraph_and_symbols (unsigned nfiles > dump_symtab (cgraph_dump_file); > } > lto_symtab_merge_symbols (); > - ggc_collect (); > + > + /* Do not GGC collect here; streaming in should not produce garbage. > + Be sure we first collect after merging symbols, setting up visibilities > + and removing unreachable nodes. This will happen after whole program > + visibility pass. This should release more memory back to the system > + and possibly allow us to re-use it for heap. */ But it's not wrong to collect here, no? See above, it should be mostly free. Thanks, Richard.
> > Index: lto-section-in.c > > =================================================================== > > --- lto-section-in.c (revision 202047) > > +++ lto-section-in.c (working copy) > > @@ -414,6 +414,41 @@ lto_get_function_in_decl_state (struct l > > return slot? ((struct lto_in_decl_state*) *slot) : NULL; > > } > > > > +/* Free decl_states. */ > > + > > +void > > +lto_free_function_in_decl_state (struct lto_in_decl_state *state) > > +{ > > + int i; > > + for (i = 0; i < LTO_N_DECL_STREAMS; i++) > > + ggc_free (state->streams[i].trees); > > We likely also waste a lot of memory by means of GC overhead here. > Moving this array out of GC space would be better (we know the > exact size upfront) - should be doable with making lto_tree_ref_able > GTY((user)) and a custom marker. Possibly, yes. It should be way how to get this memory back for malloc stuff without convincing ggc to do something useful ;) I will be happy to play with this incrementally. (It bit makes me feel bit worried about maintenance costs when user markes starts to do smart things everywhere, but this one is important enough special case for sure). > > Stray changes? Yes, sorry, I was running both tests at one machine. Removed from the patch now. > > Just leave the ggc_collect ()s there, they are free. First ggc_collect after type reading executes garbage collector and the other ggc_collects do nothing (since we never allocate enough of ggc memory to trigger them). Currently we trigger ggc_collect just after tree streaming. It spend time to walk memory, free almost nothing, produce debug output so I know how much memory we need, and mark the bounds high enough so it ggc_collect won't happen again. All in all it is pointless for overall memory use of WPA. I want the first run to happen only after things are merged & dead code removed. Then it frees about 0.5-1GB of garbage that is result of decl state streams disappearing and some trees actually being rendered dead. This way ggc allocations of IPA optimizers can use the memory freed. I can drop this change from the patch removing states now and we can discuss these incrementally. Honza > > > timevar_pop (TV_IPA_LTO_DECL_MERGE); > > /* Each pass will set the appropriate timer. */ > > @@ -3503,6 +3501,9 @@ read_cgraph_and_symbols (unsigned nfiles > > gcc_assert (all_file_decl_data[i]->symtab_node_encoder); > > lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); > > all_file_decl_data[i]->symtab_node_encoder = NULL; > > + lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); > > + all_file_decl_data[i]->global_decl_state = NULL; > > + all_file_decl_data[i]->current_decl_state = NULL; > > } > > > > /* Finally merge the cgraph according to the decl merging decisions. */ > > @@ -3513,7 +3514,12 @@ read_cgraph_and_symbols (unsigned nfiles > > dump_symtab (cgraph_dump_file); > > } > > lto_symtab_merge_symbols (); > > - ggc_collect (); > > + > > + /* Do not GGC collect here; streaming in should not produce garbage. > > + Be sure we first collect after merging symbols, setting up visibilities > > + and removing unreachable nodes. This will happen after whole program > > + visibility pass. This should release more memory back to the system > > + and possibly allow us to re-use it for heap. */ > > But it's not wrong to collect here, no? See above, it should be > mostly free. > > Thanks, > Richard.
Index: lto-symtab.c =================================================================== --- lto-symtab.c (revision 202047) +++ lto-symtab.c (working copy) @@ -80,6 +80,8 @@ lto_cgraph_replace_node (struct cgraph_n /* Redirect incomming references. */ ipa_clone_referring ((symtab_node)prevailing_node, &node->symbol.ref_list); + lto_free_function_in_decl_state_for_node ((symtab_node)node); + if (node->symbol.decl != prevailing_node->symbol.decl) cgraph_release_function_body (node); Index: cgraph.c =================================================================== --- cgraph.c (revision 202047) +++ cgraph.c (working copy) @@ -1663,6 +1663,8 @@ cgraph_release_function_body (struct cgr if (!node->used_as_abstract_origin && DECL_INITIAL (node->symbol.decl)) DECL_INITIAL (node->symbol.decl) = error_mark_node; release_function_body (node->symbol.decl); + if (node->symbol.lto_file_data) + lto_free_function_in_decl_state_for_node ((symtab_node) node); } /* Remove the node from cgraph. */ Index: lto-section-in.c =================================================================== --- lto-section-in.c (revision 202047) +++ lto-section-in.c (working copy) @@ -414,6 +414,41 @@ lto_get_function_in_decl_state (struct l return slot? ((struct lto_in_decl_state*) *slot) : NULL; } +/* Free decl_states. */ + +void +lto_free_function_in_decl_state (struct lto_in_decl_state *state) +{ + int i; + for (i = 0; i < LTO_N_DECL_STREAMS; i++) + ggc_free (state->streams[i].trees); + ggc_free (state); +} + +/* Free decl_states associated with NODE. This makes it possible to furhter + release trees needed by the NODE's body. */ + +void +lto_free_function_in_decl_state_for_node (symtab_node node) +{ + struct lto_in_decl_state temp; + void **slot; + + if (!node->symbol.lto_file_data) + return; + + temp.fn_decl = node->symbol.decl; + slot = htab_find_slot (node->symbol.lto_file_data->function_decl_states, + &temp, NO_INSERT); + if (slot && *slot) + { + lto_free_function_in_decl_state ((struct lto_in_decl_state*) *slot); + htab_clear_slot (node->symbol.lto_file_data->function_decl_states, + slot); + } + node->symbol.lto_file_data = NULL; +} + /* Report read pass end of the section. */ Index: ipa.c =================================================================== --- ipa.c (revision 202047) +++ ipa.c (working copy) @@ -873,6 +873,17 @@ function_and_variable_visibility (bool w segfault though. */ symtab_dissolve_same_comdat_group_list ((symtab_node) node); } + if (node->symbol.externally_visible + && DECL_COMDAT (node->symbol.decl) + && comdat_can_be_unshared_p ((symtab_node) node)) + { + if (dump_file + && DECL_VISIBILITY (node->symbol.decl) != VISIBILITY_HIDDEN) + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", + cgraph_node_name (node), node->symbol.order); + DECL_VISIBILITY (node->symbol.decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (node->symbol.decl) = true; + } if (node->thunk.thunk_p && TREE_PUBLIC (node->symbol.decl)) @@ -980,6 +991,17 @@ function_and_variable_visibility (bool w symtab_dissolve_same_comdat_group_list ((symtab_node) vnode); vnode->symbol.resolution = LDPR_PREVAILING_DEF_IRONLY; } + if (vnode->symbol.externally_visible + && DECL_COMDAT (vnode->symbol.decl) + && comdat_can_be_unshared_p ((symtab_node) vnode)) + { + if (dump_file + && DECL_VISIBILITY (vnode->symbol.decl) == VISIBILITY_HIDDEN) + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", + varpool_node_name (vnode), vnode->symbol.order); + DECL_VISIBILITY (vnode->symbol.decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (vnode->symbol.decl) = true; + } } if (dump_file) Index: lto/lto.c =================================================================== --- lto/lto.c (revision 202047) +++ lto/lto.c (working copy) @@ -3429,7 +3429,6 @@ read_cgraph_and_symbols (unsigned nfiles tree_scc_hash.dispose (); obstack_free (&tree_scc_hash_obstack, NULL); free_gimple_type_tables (); - ggc_collect (); /* Set the hooks so that all of the ipa passes can read in their data. */ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); @@ -3484,7 +3483,6 @@ read_cgraph_and_symbols (unsigned nfiles } htab_delete (tree_with_vars); tree_with_vars = NULL; - ggc_collect (); timevar_pop (TV_IPA_LTO_DECL_MERGE); /* Each pass will set the appropriate timer. */ @@ -3503,6 +3501,9 @@ read_cgraph_and_symbols (unsigned nfiles gcc_assert (all_file_decl_data[i]->symtab_node_encoder); lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); all_file_decl_data[i]->symtab_node_encoder = NULL; + lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); + all_file_decl_data[i]->global_decl_state = NULL; + all_file_decl_data[i]->current_decl_state = NULL; } /* Finally merge the cgraph according to the decl merging decisions. */ @@ -3513,7 +3514,12 @@ read_cgraph_and_symbols (unsigned nfiles dump_symtab (cgraph_dump_file); } lto_symtab_merge_symbols (); - ggc_collect (); + + /* Do not GGC collect here; streaming in should not produce garbage. + Be sure we first collect after merging symbols, setting up visibilities + and removing unreachable nodes. This will happen after whole program + visibility pass. This should release more memory back to the system + and possibly allow us to re-use it for heap. */ cgraph_state = CGRAPH_STATE_IPA_SSA; timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); Index: lto-streamer.h =================================================================== --- lto-streamer.h (revision 202047) +++ lto-streamer.h (working copy) @@ -774,6 +774,8 @@ extern hashval_t lto_hash_in_decl_state extern int lto_eq_in_decl_state (const void *, const void *); extern struct lto_in_decl_state *lto_get_function_in_decl_state ( struct lto_file_decl_data *, tree); +extern void lto_free_function_in_decl_state (struct lto_in_decl_state *); +extern void lto_free_function_in_decl_state_for_node (symtab_node); extern void lto_section_overrun (struct lto_input_block *) ATTRIBUTE_NORETURN; extern void lto_value_range_error (const char *, HOST_WIDE_INT, HOST_WIDE_INT,