Message ID | 20141116071537.GB27166@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
On November 16, 2014 8:15:37 AM CET, Jan Hubicka <hubicka@ucw.cz> wrote: >Hi, >late in GCC 4.9 development we broke the feature that ltrans stages do >not read all >functions in ahead. This is because of late IPA passes that do not >like to see functions >without IPA transformations applied. I was originally OK with the >solution based >on fact that we have only IPA-PTA as late IPA pass that is disabled by >default and >eventually probably should become part of WPA in some form. >SIMD streaming was however added and this causes us to stream in all >function bodies >and apply all inlining decisions at very beggining of optimization >queue. > >Fixed by this patch. get_body is now responsible for applying >transformations >on demand and late IPA passes needs to call get_body on functions that >they >are interested in + are advised to not be interested in every single >function in >the program. > >The patch also hits a bug in i386's ix86_set_current_function. It is >responsible >for initializing backend and it does so lazily remembering the previous >options >backend was initialized for. Pragma parsing however clears the cache >that leads >to wrong settings being used for subsetquent functions. > >Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit >of more testing. But for example for IPA pta this means we apply all IPA transforms without any garbage collection run? Richard. >Index: gcc/cgraphclones.c >=================================================================== >--- gcc/cgraphclones.c (revision 217612) >+++ gcc/cgraphclones.c (working copy) >@@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t > node = duplicate_thunk_for_node (thunk_of, node); > > if (!DECL_ARGUMENTS (thunk->decl)) >- thunk->get_body (); >+ thunk->get_untransformed_body (); > > cgraph_edge *cs; > for (cs = node->callers; cs; cs = cs->next_caller) >@@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo > && !gimple_has_body_p (node->decl)) > { > if (!node->clone_of->clone_of) >- node->clone_of->get_body (); >+ node->clone_of->get_untransformed_body (); > if (gimple_has_body_p (node->clone_of->decl)) > { > if (symtab->dump_file) >Index: gcc/ipa-icf.c >=================================================================== >--- gcc/ipa-icf.c (revision 217612) >+++ gcc/ipa-icf.c (working copy) >@@ -706,7 +706,7 @@ void > sem_function::init (void) > { > if (in_lto_p) >- get_node ()->get_body (); >+ get_node ()->get_untransformed_body (); > > tree fndecl = node->decl; > function *func = DECL_STRUCT_FUNCTION (fndecl); >Index: gcc/passes.c >=================================================================== >--- gcc/passes.c (revision 217612) >+++ gcc/passes.c (working copy) >@@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass) > executed. */ > invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass); > >- /* SIPLE IPA passes do not handle callgraphs with IPA transforms in >it. >- Apply all trnasforms first. */ >- if (pass->type == SIMPLE_IPA_PASS) >- { >- struct cgraph_node *node; >- bool applied = false; >- FOR_EACH_DEFINED_FUNCTION (node) >- if (node->analyzed >- && node->has_gimple_body_p () >- && (!node->clone_of || node->decl != node->clone_of->decl)) >- { >- if (!node->global.inlined_to >- && node->ipa_transforms_to_apply.exists ()) >- { >- node->get_body (); >- push_cfun (DECL_STRUCT_FUNCTION (node->decl)); >- execute_all_ipa_transforms (); >- cgraph_edge::rebuild_edges (); >- free_dominance_info (CDI_DOMINATORS); >- free_dominance_info (CDI_POST_DOMINATORS); >- pop_cfun (); >- applied = true; >- } >- } >- if (applied) >- symtab->remove_unreachable_nodes (false, dump_file); >- /* Restore current_pass. */ >- current_pass = pass; >- } >- > if (!quiet_flag && !cfun) > fprintf (stderr, " <%s>", pass->name ? pass->name : ""); > >Index: gcc/cgraphunit.c >=================================================================== >--- gcc/cgraphunit.c (revision 217612) >+++ gcc/cgraphunit.c (working copy) >@@ -197,7 +197,6 @@ along with GCC; see the file COPYING3. > #include "target.h" > #include "diagnostic.h" > #include "params.h" >-#include "fibheap.h" > #include "intl.h" > #include "hash-map.h" > #include "plugin-api.h" >@@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a > } > > if (in_lto_p) >- get_body (); >+ get_untransformed_body (); > a = DECL_ARGUMENTS (thunk_fndecl); > > current_function_decl = thunk_fndecl; >@@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a > gimple ret; > > if (in_lto_p) >- get_body (); >+ get_untransformed_body (); > a = DECL_ARGUMENTS (thunk_fndecl); > > current_function_decl = thunk_fndecl; >@@ -1744,7 +1743,7 @@ cgraph_node::expand (void) > announce_function (decl); > process = 0; > gcc_assert (lowered); >- get_body (); >+ get_untransformed_body (); > > /* Generate RTL for the body of DECL. */ > >Index: gcc/cgraph.c >=================================================================== >--- gcc/cgraph.c (revision 217612) >+++ gcc/cgraph.c (working copy) >@@ -1664,29 +1664,33 @@ release_function_body (tree decl) > { > if (DECL_STRUCT_FUNCTION (decl)) > { >- push_cfun (DECL_STRUCT_FUNCTION (decl)); >- if (cfun->cfg >- && current_loops) >- { >- cfun->curr_properties &= ~PROP_loops; >- loop_optimizer_finalize (); >- } >- if (cfun->gimple_df) >+ if (DECL_STRUCT_FUNCTION (decl)->cfg >+ || DECL_STRUCT_FUNCTION (decl)->gimple_df) > { >- delete_tree_ssa (); >- delete_tree_cfg_annotations (); >- cfun->eh = NULL; >- } >- if (cfun->cfg) >- { >- gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >- gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >- clear_edges (); >- cfun->cfg = NULL; >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >+ if (cfun->cfg >+ && current_loops) >+ { >+ cfun->curr_properties &= ~PROP_loops; >+ loop_optimizer_finalize (); >+ } >+ if (cfun->gimple_df) >+ { >+ delete_tree_ssa (); >+ delete_tree_cfg_annotations (); >+ cfun->eh = NULL; >+ } >+ if (cfun->cfg) >+ { >+ gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >+ gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >+ clear_edges (); >+ cfun->cfg = NULL; >+ } >+ if (cfun->value_histograms) >+ free_histograms (); >+ pop_cfun (); > } >- if (cfun->value_histograms) >- free_histograms (); >- pop_cfun (); > gimple_set_body (decl, NULL); > /* Struct function hangs a lot of data that would leak if we didn't > removed all pointers to it. */ >@@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail > present. */ > > bool >-cgraph_node::get_body (void) >+cgraph_node::get_untransformed_body (void) > { > lto_file_decl_data *file_data; > const char *data, *name; >@@ -3178,6 +3182,44 @@ cgraph_node::get_body (void) > return true; > } > >+/* Prepare function body. When doing LTO, read cgraph_node's body >from disk >+ if it is not already present. When some IPA transformations are >scheduled, >+ apply them. */ >+ >+bool >+cgraph_node::get_body (void) >+{ >+ bool updated; >+ >+ updated = get_untransformed_body (); >+ >+ /* Getting transformed body makes no sense for inline clones; >+ we should never use this on real clones becuase they are >materialized >+ early. >+ TODO: Materializing clones here will likely lead to smaller >LTRANS >+ footprint. */ >+ gcc_assert (!global.inlined_to && !clone_of); >+ if (ipa_transforms_to_apply.exists ()) >+ { >+ opt_pass *saved_current_pass = current_pass; >+ FILE *saved_dump_file = dump_file; >+ int saved_dump_flags = dump_flags; >+ >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >+ execute_all_ipa_transforms (); >+ cgraph_edge::rebuild_edges (); >+ free_dominance_info (CDI_DOMINATORS); >+ free_dominance_info (CDI_POST_DOMINATORS); >+ pop_cfun (); >+ updated = true; >+ >+ current_pass = saved_current_pass; >+ dump_file = saved_dump_file; >+ dump_flags = saved_dump_flags; >+ } >+ return updated; >+} >+ > /* Return the DECL_STRUCT_FUNCTION of the function. */ > > struct function * >Index: gcc/cgraph.h >=================================================================== >--- gcc/cgraph.h (revision 217612) >+++ gcc/cgraph.h (working copy) >@@ -933,6 +933,11 @@ public: > >/* When doing LTO, read cgraph_node's body from disk if it is not >already > present. */ >+ bool get_untransformed_body (void); >+ >+ /* Prepare function body. When doing LTO, read cgraph_node's body >from disk >+ if it is not already present. When some IPA transformations are >scheduled, >+ apply them. */ > bool get_body (void); > > /* Release memory used to represent body of function. >Index: gcc/config/i386/i386.c >=================================================================== >--- gcc/config/i386/i386.c (revision 217612) >+++ gcc/config/i386/i386.c (working copy) >@@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal > /* Remember the last target of ix86_set_current_function. */ > static GTY(()) tree ix86_previous_fndecl; > >+/* Set target globals to default. */ >+ >+static void >+ix86_reset_to_default_globals (void) >+{ >+ tree old_tree = (ix86_previous_fndecl >+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) >+ : NULL_TREE); >+ >+ if (old_tree) >+ { >+ tree new_tree = target_option_current_node; >+ cl_target_option_restore (&global_options, >+ TREE_TARGET_OPTION (new_tree)); >+ if (TREE_TARGET_GLOBALS (new_tree)) >+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >+ else if (new_tree == target_option_default_node) >+ restore_target_globals (&default_target_globals); >+ else >+ TREE_TARGET_GLOBALS (new_tree) >+ = save_target_globals_default_opts (); >+ } >+} >+ > /* Invalidate ix86_previous_fndecl cache. */ > void > ix86_reset_previous_fndecl (void) > { >+ ix86_reset_to_default_globals (); > ix86_previous_fndecl = NULL_TREE; > } > >@@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl) > } > > else if (old_tree) >- { >- new_tree = target_option_current_node; >- cl_target_option_restore (&global_options, >- TREE_TARGET_OPTION (new_tree)); >- if (TREE_TARGET_GLOBALS (new_tree)) >- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >- else if (new_tree == target_option_default_node) >- restore_target_globals (&default_target_globals); >- else >- TREE_TARGET_GLOBALS (new_tree) >- = save_target_globals_default_opts (); >- } >+ ix86_reset_to_default_globals (); > } > } > >@@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no > bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); > gcc_assert (ok); > pop_cfun (); >- ix86_previous_fndecl = NULL_TREE; >+ ix86_reset_previous_fndecl (); > ix86_set_current_function (node->decl); > } > >Index: gcc/tree-inline.c >=================================================================== >--- gcc/tree-inline.c (revision 217612) >+++ gcc/tree-inline.c (working copy) >@@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp > goto egress; > } > fn = cg_edge->callee->decl; >- cg_edge->callee->get_body (); >+ cg_edge->callee->get_untransformed_body (); > > #ifdef ENABLE_CHECKING > if (cg_edge->callee->decl != id->dst_node->decl) >Index: gcc/tree-ssa-structalias.c >=================================================================== >--- gcc/tree-ssa-structalias.c (revision 217612) >+++ gcc/tree-ssa-structalias.c (working copy) >@@ -7086,7 +7086,7 @@ ipa_pta_execute (void) > /* Nodes without a body are not interesting. Especially do not > visit clones at this point for now - we get duplicate decls > there for inline clones at least. */ >- if (!node->has_gimple_body_p () || node->clone_of) >+ if (!node->has_gimple_body_p () || node->global.inlined_to) > continue; > node->get_body (); >
> >The patch also hits a bug in i386's ix86_set_current_function. It is > >responsible > >for initializing backend and it does so lazily remembering the previous > >options > >backend was initialized for. Pragma parsing however clears the cache > >that leads > >to wrong settings being used for subsetquent functions. > > > >Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit > >of more testing. > > But for example for IPA pta this means we apply all IPA transforms without any garbage collection run? The original loop also did not contain ggc_collect calls. Can we call ggc_collect from ipa-pta's data collection loop? (in general I think -fipa-pta is kind of -fplease-explode-on-large-programs :)) Honza > > Richard. > > >Index: gcc/cgraphclones.c > >=================================================================== > >--- gcc/cgraphclones.c (revision 217612) > >+++ gcc/cgraphclones.c (working copy) > >@@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t > > node = duplicate_thunk_for_node (thunk_of, node); > > > > if (!DECL_ARGUMENTS (thunk->decl)) > >- thunk->get_body (); > >+ thunk->get_untransformed_body (); > > > > cgraph_edge *cs; > > for (cs = node->callers; cs; cs = cs->next_caller) > >@@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo > > && !gimple_has_body_p (node->decl)) > > { > > if (!node->clone_of->clone_of) > >- node->clone_of->get_body (); > >+ node->clone_of->get_untransformed_body (); > > if (gimple_has_body_p (node->clone_of->decl)) > > { > > if (symtab->dump_file) > >Index: gcc/ipa-icf.c > >=================================================================== > >--- gcc/ipa-icf.c (revision 217612) > >+++ gcc/ipa-icf.c (working copy) > >@@ -706,7 +706,7 @@ void > > sem_function::init (void) > > { > > if (in_lto_p) > >- get_node ()->get_body (); > >+ get_node ()->get_untransformed_body (); > > > > tree fndecl = node->decl; > > function *func = DECL_STRUCT_FUNCTION (fndecl); > >Index: gcc/passes.c > >=================================================================== > >--- gcc/passes.c (revision 217612) > >+++ gcc/passes.c (working copy) > >@@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass) > > executed. */ > > invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass); > > > >- /* SIPLE IPA passes do not handle callgraphs with IPA transforms in > >it. > >- Apply all trnasforms first. */ > >- if (pass->type == SIMPLE_IPA_PASS) > >- { > >- struct cgraph_node *node; > >- bool applied = false; > >- FOR_EACH_DEFINED_FUNCTION (node) > >- if (node->analyzed > >- && node->has_gimple_body_p () > >- && (!node->clone_of || node->decl != node->clone_of->decl)) > >- { > >- if (!node->global.inlined_to > >- && node->ipa_transforms_to_apply.exists ()) > >- { > >- node->get_body (); > >- push_cfun (DECL_STRUCT_FUNCTION (node->decl)); > >- execute_all_ipa_transforms (); > >- cgraph_edge::rebuild_edges (); > >- free_dominance_info (CDI_DOMINATORS); > >- free_dominance_info (CDI_POST_DOMINATORS); > >- pop_cfun (); > >- applied = true; > >- } > >- } > >- if (applied) > >- symtab->remove_unreachable_nodes (false, dump_file); > >- /* Restore current_pass. */ > >- current_pass = pass; > >- } > >- > > if (!quiet_flag && !cfun) > > fprintf (stderr, " <%s>", pass->name ? pass->name : ""); > > > >Index: gcc/cgraphunit.c > >=================================================================== > >--- gcc/cgraphunit.c (revision 217612) > >+++ gcc/cgraphunit.c (working copy) > >@@ -197,7 +197,6 @@ along with GCC; see the file COPYING3. > > #include "target.h" > > #include "diagnostic.h" > > #include "params.h" > >-#include "fibheap.h" > > #include "intl.h" > > #include "hash-map.h" > > #include "plugin-api.h" > >@@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a > > } > > > > if (in_lto_p) > >- get_body (); > >+ get_untransformed_body (); > > a = DECL_ARGUMENTS (thunk_fndecl); > > > > current_function_decl = thunk_fndecl; > >@@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a > > gimple ret; > > > > if (in_lto_p) > >- get_body (); > >+ get_untransformed_body (); > > a = DECL_ARGUMENTS (thunk_fndecl); > > > > current_function_decl = thunk_fndecl; > >@@ -1744,7 +1743,7 @@ cgraph_node::expand (void) > > announce_function (decl); > > process = 0; > > gcc_assert (lowered); > >- get_body (); > >+ get_untransformed_body (); > > > > /* Generate RTL for the body of DECL. */ > > > >Index: gcc/cgraph.c > >=================================================================== > >--- gcc/cgraph.c (revision 217612) > >+++ gcc/cgraph.c (working copy) > >@@ -1664,29 +1664,33 @@ release_function_body (tree decl) > > { > > if (DECL_STRUCT_FUNCTION (decl)) > > { > >- push_cfun (DECL_STRUCT_FUNCTION (decl)); > >- if (cfun->cfg > >- && current_loops) > >- { > >- cfun->curr_properties &= ~PROP_loops; > >- loop_optimizer_finalize (); > >- } > >- if (cfun->gimple_df) > >+ if (DECL_STRUCT_FUNCTION (decl)->cfg > >+ || DECL_STRUCT_FUNCTION (decl)->gimple_df) > > { > >- delete_tree_ssa (); > >- delete_tree_cfg_annotations (); > >- cfun->eh = NULL; > >- } > >- if (cfun->cfg) > >- { > >- gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); > >- gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); > >- clear_edges (); > >- cfun->cfg = NULL; > >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); > >+ if (cfun->cfg > >+ && current_loops) > >+ { > >+ cfun->curr_properties &= ~PROP_loops; > >+ loop_optimizer_finalize (); > >+ } > >+ if (cfun->gimple_df) > >+ { > >+ delete_tree_ssa (); > >+ delete_tree_cfg_annotations (); > >+ cfun->eh = NULL; > >+ } > >+ if (cfun->cfg) > >+ { > >+ gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); > >+ gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); > >+ clear_edges (); > >+ cfun->cfg = NULL; > >+ } > >+ if (cfun->value_histograms) > >+ free_histograms (); > >+ pop_cfun (); > > } > >- if (cfun->value_histograms) > >- free_histograms (); > >- pop_cfun (); > > gimple_set_body (decl, NULL); > > /* Struct function hangs a lot of data that would leak if we didn't > > removed all pointers to it. */ > >@@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail > > present. */ > > > > bool > >-cgraph_node::get_body (void) > >+cgraph_node::get_untransformed_body (void) > > { > > lto_file_decl_data *file_data; > > const char *data, *name; > >@@ -3178,6 +3182,44 @@ cgraph_node::get_body (void) > > return true; > > } > > > >+/* Prepare function body. When doing LTO, read cgraph_node's body > >from disk > >+ if it is not already present. When some IPA transformations are > >scheduled, > >+ apply them. */ > >+ > >+bool > >+cgraph_node::get_body (void) > >+{ > >+ bool updated; > >+ > >+ updated = get_untransformed_body (); > >+ > >+ /* Getting transformed body makes no sense for inline clones; > >+ we should never use this on real clones becuase they are > >materialized > >+ early. > >+ TODO: Materializing clones here will likely lead to smaller > >LTRANS > >+ footprint. */ > >+ gcc_assert (!global.inlined_to && !clone_of); > >+ if (ipa_transforms_to_apply.exists ()) > >+ { > >+ opt_pass *saved_current_pass = current_pass; > >+ FILE *saved_dump_file = dump_file; > >+ int saved_dump_flags = dump_flags; > >+ > >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); > >+ execute_all_ipa_transforms (); > >+ cgraph_edge::rebuild_edges (); > >+ free_dominance_info (CDI_DOMINATORS); > >+ free_dominance_info (CDI_POST_DOMINATORS); > >+ pop_cfun (); > >+ updated = true; > >+ > >+ current_pass = saved_current_pass; > >+ dump_file = saved_dump_file; > >+ dump_flags = saved_dump_flags; > >+ } > >+ return updated; > >+} > >+ > > /* Return the DECL_STRUCT_FUNCTION of the function. */ > > > > struct function * > >Index: gcc/cgraph.h > >=================================================================== > >--- gcc/cgraph.h (revision 217612) > >+++ gcc/cgraph.h (working copy) > >@@ -933,6 +933,11 @@ public: > > > >/* When doing LTO, read cgraph_node's body from disk if it is not > >already > > present. */ > >+ bool get_untransformed_body (void); > >+ > >+ /* Prepare function body. When doing LTO, read cgraph_node's body > >from disk > >+ if it is not already present. When some IPA transformations are > >scheduled, > >+ apply them. */ > > bool get_body (void); > > > > /* Release memory used to represent body of function. > >Index: gcc/config/i386/i386.c > >=================================================================== > >--- gcc/config/i386/i386.c (revision 217612) > >+++ gcc/config/i386/i386.c (working copy) > >@@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal > > /* Remember the last target of ix86_set_current_function. */ > > static GTY(()) tree ix86_previous_fndecl; > > > >+/* Set target globals to default. */ > >+ > >+static void > >+ix86_reset_to_default_globals (void) > >+{ > >+ tree old_tree = (ix86_previous_fndecl > >+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) > >+ : NULL_TREE); > >+ > >+ if (old_tree) > >+ { > >+ tree new_tree = target_option_current_node; > >+ cl_target_option_restore (&global_options, > >+ TREE_TARGET_OPTION (new_tree)); > >+ if (TREE_TARGET_GLOBALS (new_tree)) > >+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); > >+ else if (new_tree == target_option_default_node) > >+ restore_target_globals (&default_target_globals); > >+ else > >+ TREE_TARGET_GLOBALS (new_tree) > >+ = save_target_globals_default_opts (); > >+ } > >+} > >+ > > /* Invalidate ix86_previous_fndecl cache. */ > > void > > ix86_reset_previous_fndecl (void) > > { > >+ ix86_reset_to_default_globals (); > > ix86_previous_fndecl = NULL_TREE; > > } > > > >@@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl) > > } > > > > else if (old_tree) > >- { > >- new_tree = target_option_current_node; > >- cl_target_option_restore (&global_options, > >- TREE_TARGET_OPTION (new_tree)); > >- if (TREE_TARGET_GLOBALS (new_tree)) > >- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); > >- else if (new_tree == target_option_default_node) > >- restore_target_globals (&default_target_globals); > >- else > >- TREE_TARGET_GLOBALS (new_tree) > >- = save_target_globals_default_opts (); > >- } > >+ ix86_reset_to_default_globals (); > > } > > } > > > >@@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no > > bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); > > gcc_assert (ok); > > pop_cfun (); > >- ix86_previous_fndecl = NULL_TREE; > >+ ix86_reset_previous_fndecl (); > > ix86_set_current_function (node->decl); > > } > > > >Index: gcc/tree-inline.c > >=================================================================== > >--- gcc/tree-inline.c (revision 217612) > >+++ gcc/tree-inline.c (working copy) > >@@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp > > goto egress; > > } > > fn = cg_edge->callee->decl; > >- cg_edge->callee->get_body (); > >+ cg_edge->callee->get_untransformed_body (); > > > > #ifdef ENABLE_CHECKING > > if (cg_edge->callee->decl != id->dst_node->decl) > >Index: gcc/tree-ssa-structalias.c > >=================================================================== > >--- gcc/tree-ssa-structalias.c (revision 217612) > >+++ gcc/tree-ssa-structalias.c (working copy) > >@@ -7086,7 +7086,7 @@ ipa_pta_execute (void) > > /* Nodes without a body are not interesting. Especially do not > > visit clones at this point for now - we get duplicate decls > > there for inline clones at least. */ > >- if (!node->has_gimple_body_p () || node->clone_of) > >+ if (!node->has_gimple_body_p () || node->global.inlined_to) > > continue; > > node->get_body (); > > >
On Sun, Nov 16, 2014 at 7:00 PM, Jan Hubicka <hubicka@ucw.cz> wrote: >> >The patch also hits a bug in i386's ix86_set_current_function. It is >> >responsible >> >for initializing backend and it does so lazily remembering the previous >> >options >> >backend was initialized for. Pragma parsing however clears the cache >> >that leads >> >to wrong settings being used for subsetquent functions. >> > >> >Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit >> >of more testing. >> >> But for example for IPA pta this means we apply all IPA transforms without any garbage collection run? > > The original loop also did not contain ggc_collect calls. Can we call ggc_collect from ipa-pta's > data collection loop? I suppose we could. We can also "unload" bodies if that is possible? How much garbage do we accumulate during the IPA transform apply phase? > (in general I think -fipa-pta is kind of -fplease-explode-on-large-programs :)) True... Richard. > Honza >> >> Richard. >> >> >Index: gcc/cgraphclones.c >> >=================================================================== >> >--- gcc/cgraphclones.c (revision 217612) >> >+++ gcc/cgraphclones.c (working copy) >> >@@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t >> > node = duplicate_thunk_for_node (thunk_of, node); >> > >> > if (!DECL_ARGUMENTS (thunk->decl)) >> >- thunk->get_body (); >> >+ thunk->get_untransformed_body (); >> > >> > cgraph_edge *cs; >> > for (cs = node->callers; cs; cs = cs->next_caller) >> >@@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo >> > && !gimple_has_body_p (node->decl)) >> > { >> > if (!node->clone_of->clone_of) >> >- node->clone_of->get_body (); >> >+ node->clone_of->get_untransformed_body (); >> > if (gimple_has_body_p (node->clone_of->decl)) >> > { >> > if (symtab->dump_file) >> >Index: gcc/ipa-icf.c >> >=================================================================== >> >--- gcc/ipa-icf.c (revision 217612) >> >+++ gcc/ipa-icf.c (working copy) >> >@@ -706,7 +706,7 @@ void >> > sem_function::init (void) >> > { >> > if (in_lto_p) >> >- get_node ()->get_body (); >> >+ get_node ()->get_untransformed_body (); >> > >> > tree fndecl = node->decl; >> > function *func = DECL_STRUCT_FUNCTION (fndecl); >> >Index: gcc/passes.c >> >=================================================================== >> >--- gcc/passes.c (revision 217612) >> >+++ gcc/passes.c (working copy) >> >@@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass) >> > executed. */ >> > invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass); >> > >> >- /* SIPLE IPA passes do not handle callgraphs with IPA transforms in >> >it. >> >- Apply all trnasforms first. */ >> >- if (pass->type == SIMPLE_IPA_PASS) >> >- { >> >- struct cgraph_node *node; >> >- bool applied = false; >> >- FOR_EACH_DEFINED_FUNCTION (node) >> >- if (node->analyzed >> >- && node->has_gimple_body_p () >> >- && (!node->clone_of || node->decl != node->clone_of->decl)) >> >- { >> >- if (!node->global.inlined_to >> >- && node->ipa_transforms_to_apply.exists ()) >> >- { >> >- node->get_body (); >> >- push_cfun (DECL_STRUCT_FUNCTION (node->decl)); >> >- execute_all_ipa_transforms (); >> >- cgraph_edge::rebuild_edges (); >> >- free_dominance_info (CDI_DOMINATORS); >> >- free_dominance_info (CDI_POST_DOMINATORS); >> >- pop_cfun (); >> >- applied = true; >> >- } >> >- } >> >- if (applied) >> >- symtab->remove_unreachable_nodes (false, dump_file); >> >- /* Restore current_pass. */ >> >- current_pass = pass; >> >- } >> >- >> > if (!quiet_flag && !cfun) >> > fprintf (stderr, " <%s>", pass->name ? pass->name : ""); >> > >> >Index: gcc/cgraphunit.c >> >=================================================================== >> >--- gcc/cgraphunit.c (revision 217612) >> >+++ gcc/cgraphunit.c (working copy) >> >@@ -197,7 +197,6 @@ along with GCC; see the file COPYING3. >> > #include "target.h" >> > #include "diagnostic.h" >> > #include "params.h" >> >-#include "fibheap.h" >> > #include "intl.h" >> > #include "hash-map.h" >> > #include "plugin-api.h" >> >@@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a >> > } >> > >> > if (in_lto_p) >> >- get_body (); >> >+ get_untransformed_body (); >> > a = DECL_ARGUMENTS (thunk_fndecl); >> > >> > current_function_decl = thunk_fndecl; >> >@@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a >> > gimple ret; >> > >> > if (in_lto_p) >> >- get_body (); >> >+ get_untransformed_body (); >> > a = DECL_ARGUMENTS (thunk_fndecl); >> > >> > current_function_decl = thunk_fndecl; >> >@@ -1744,7 +1743,7 @@ cgraph_node::expand (void) >> > announce_function (decl); >> > process = 0; >> > gcc_assert (lowered); >> >- get_body (); >> >+ get_untransformed_body (); >> > >> > /* Generate RTL for the body of DECL. */ >> > >> >Index: gcc/cgraph.c >> >=================================================================== >> >--- gcc/cgraph.c (revision 217612) >> >+++ gcc/cgraph.c (working copy) >> >@@ -1664,29 +1664,33 @@ release_function_body (tree decl) >> > { >> > if (DECL_STRUCT_FUNCTION (decl)) >> > { >> >- push_cfun (DECL_STRUCT_FUNCTION (decl)); >> >- if (cfun->cfg >> >- && current_loops) >> >- { >> >- cfun->curr_properties &= ~PROP_loops; >> >- loop_optimizer_finalize (); >> >- } >> >- if (cfun->gimple_df) >> >+ if (DECL_STRUCT_FUNCTION (decl)->cfg >> >+ || DECL_STRUCT_FUNCTION (decl)->gimple_df) >> > { >> >- delete_tree_ssa (); >> >- delete_tree_cfg_annotations (); >> >- cfun->eh = NULL; >> >- } >> >- if (cfun->cfg) >> >- { >> >- gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >> >- gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >> >- clear_edges (); >> >- cfun->cfg = NULL; >> >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >> >+ if (cfun->cfg >> >+ && current_loops) >> >+ { >> >+ cfun->curr_properties &= ~PROP_loops; >> >+ loop_optimizer_finalize (); >> >+ } >> >+ if (cfun->gimple_df) >> >+ { >> >+ delete_tree_ssa (); >> >+ delete_tree_cfg_annotations (); >> >+ cfun->eh = NULL; >> >+ } >> >+ if (cfun->cfg) >> >+ { >> >+ gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); >> >+ gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); >> >+ clear_edges (); >> >+ cfun->cfg = NULL; >> >+ } >> >+ if (cfun->value_histograms) >> >+ free_histograms (); >> >+ pop_cfun (); >> > } >> >- if (cfun->value_histograms) >> >- free_histograms (); >> >- pop_cfun (); >> > gimple_set_body (decl, NULL); >> > /* Struct function hangs a lot of data that would leak if we didn't >> > removed all pointers to it. */ >> >@@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail >> > present. */ >> > >> > bool >> >-cgraph_node::get_body (void) >> >+cgraph_node::get_untransformed_body (void) >> > { >> > lto_file_decl_data *file_data; >> > const char *data, *name; >> >@@ -3178,6 +3182,44 @@ cgraph_node::get_body (void) >> > return true; >> > } >> > >> >+/* Prepare function body. When doing LTO, read cgraph_node's body >> >from disk >> >+ if it is not already present. When some IPA transformations are >> >scheduled, >> >+ apply them. */ >> >+ >> >+bool >> >+cgraph_node::get_body (void) >> >+{ >> >+ bool updated; >> >+ >> >+ updated = get_untransformed_body (); >> >+ >> >+ /* Getting transformed body makes no sense for inline clones; >> >+ we should never use this on real clones becuase they are >> >materialized >> >+ early. >> >+ TODO: Materializing clones here will likely lead to smaller >> >LTRANS >> >+ footprint. */ >> >+ gcc_assert (!global.inlined_to && !clone_of); >> >+ if (ipa_transforms_to_apply.exists ()) >> >+ { >> >+ opt_pass *saved_current_pass = current_pass; >> >+ FILE *saved_dump_file = dump_file; >> >+ int saved_dump_flags = dump_flags; >> >+ >> >+ push_cfun (DECL_STRUCT_FUNCTION (decl)); >> >+ execute_all_ipa_transforms (); >> >+ cgraph_edge::rebuild_edges (); >> >+ free_dominance_info (CDI_DOMINATORS); >> >+ free_dominance_info (CDI_POST_DOMINATORS); >> >+ pop_cfun (); >> >+ updated = true; >> >+ >> >+ current_pass = saved_current_pass; >> >+ dump_file = saved_dump_file; >> >+ dump_flags = saved_dump_flags; >> >+ } >> >+ return updated; >> >+} >> >+ >> > /* Return the DECL_STRUCT_FUNCTION of the function. */ >> > >> > struct function * >> >Index: gcc/cgraph.h >> >=================================================================== >> >--- gcc/cgraph.h (revision 217612) >> >+++ gcc/cgraph.h (working copy) >> >@@ -933,6 +933,11 @@ public: >> > >> >/* When doing LTO, read cgraph_node's body from disk if it is not >> >already >> > present. */ >> >+ bool get_untransformed_body (void); >> >+ >> >+ /* Prepare function body. When doing LTO, read cgraph_node's body >> >from disk >> >+ if it is not already present. When some IPA transformations are >> >scheduled, >> >+ apply them. */ >> > bool get_body (void); >> > >> > /* Release memory used to represent body of function. >> >Index: gcc/config/i386/i386.c >> >=================================================================== >> >--- gcc/config/i386/i386.c (revision 217612) >> >+++ gcc/config/i386/i386.c (working copy) >> >@@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal >> > /* Remember the last target of ix86_set_current_function. */ >> > static GTY(()) tree ix86_previous_fndecl; >> > >> >+/* Set target globals to default. */ >> >+ >> >+static void >> >+ix86_reset_to_default_globals (void) >> >+{ >> >+ tree old_tree = (ix86_previous_fndecl >> >+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) >> >+ : NULL_TREE); >> >+ >> >+ if (old_tree) >> >+ { >> >+ tree new_tree = target_option_current_node; >> >+ cl_target_option_restore (&global_options, >> >+ TREE_TARGET_OPTION (new_tree)); >> >+ if (TREE_TARGET_GLOBALS (new_tree)) >> >+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >> >+ else if (new_tree == target_option_default_node) >> >+ restore_target_globals (&default_target_globals); >> >+ else >> >+ TREE_TARGET_GLOBALS (new_tree) >> >+ = save_target_globals_default_opts (); >> >+ } >> >+} >> >+ >> > /* Invalidate ix86_previous_fndecl cache. */ >> > void >> > ix86_reset_previous_fndecl (void) >> > { >> >+ ix86_reset_to_default_globals (); >> > ix86_previous_fndecl = NULL_TREE; >> > } >> > >> >@@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl) >> > } >> > >> > else if (old_tree) >> >- { >> >- new_tree = target_option_current_node; >> >- cl_target_option_restore (&global_options, >> >- TREE_TARGET_OPTION (new_tree)); >> >- if (TREE_TARGET_GLOBALS (new_tree)) >> >- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); >> >- else if (new_tree == target_option_default_node) >> >- restore_target_globals (&default_target_globals); >> >- else >> >- TREE_TARGET_GLOBALS (new_tree) >> >- = save_target_globals_default_opts (); >> >- } >> >+ ix86_reset_to_default_globals (); >> > } >> > } >> > >> >@@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no >> > bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); >> > gcc_assert (ok); >> > pop_cfun (); >> >- ix86_previous_fndecl = NULL_TREE; >> >+ ix86_reset_previous_fndecl (); >> > ix86_set_current_function (node->decl); >> > } >> > >> >Index: gcc/tree-inline.c >> >=================================================================== >> >--- gcc/tree-inline.c (revision 217612) >> >+++ gcc/tree-inline.c (working copy) >> >@@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp >> > goto egress; >> > } >> > fn = cg_edge->callee->decl; >> >- cg_edge->callee->get_body (); >> >+ cg_edge->callee->get_untransformed_body (); >> > >> > #ifdef ENABLE_CHECKING >> > if (cg_edge->callee->decl != id->dst_node->decl) >> >Index: gcc/tree-ssa-structalias.c >> >=================================================================== >> >--- gcc/tree-ssa-structalias.c (revision 217612) >> >+++ gcc/tree-ssa-structalias.c (working copy) >> >@@ -7086,7 +7086,7 @@ ipa_pta_execute (void) >> > /* Nodes without a body are not interesting. Especially do not >> > visit clones at this point for now - we get duplicate decls >> > there for inline clones at least. */ >> >- if (!node->has_gimple_body_p () || node->clone_of) >> >+ if (!node->has_gimple_body_p () || node->global.inlined_to) >> > continue; >> > node->get_body (); >> > >>
On Sat, Nov 15, 2014 at 11:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote: > Hi, > late in GCC 4.9 development we broke the feature that ltrans stages do not read all > functions in ahead. This is because of late IPA passes that do not like to see functions > without IPA transformations applied. I was originally OK with the solution based > on fact that we have only IPA-PTA as late IPA pass that is disabled by default and > eventually probably should become part of WPA in some form. > SIMD streaming was however added and this causes us to stream in all function bodies > and apply all inlining decisions at very beggining of optimization queue. > > Fixed by this patch. get_body is now responsible for applying transformations > on demand and late IPA passes needs to call get_body on functions that they > are interested in + are advised to not be interested in every single function in > the program. > > The patch also hits a bug in i386's ix86_set_current_function. It is responsible > for initializing backend and it does so lazily remembering the previous options > backend was initialized for. Pragma parsing however clears the cache that leads > to wrong settings being used for subsetquent functions. > > Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit of more testing. > This caused: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63967 H.J.
Index: gcc/cgraphclones.c =================================================================== --- gcc/cgraphclones.c (revision 217612) +++ gcc/cgraphclones.c (working copy) @@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t node = duplicate_thunk_for_node (thunk_of, node); if (!DECL_ARGUMENTS (thunk->decl)) - thunk->get_body (); + thunk->get_untransformed_body (); cgraph_edge *cs; for (cs = node->callers; cs; cs = cs->next_caller) @@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo && !gimple_has_body_p (node->decl)) { if (!node->clone_of->clone_of) - node->clone_of->get_body (); + node->clone_of->get_untransformed_body (); if (gimple_has_body_p (node->clone_of->decl)) { if (symtab->dump_file) Index: gcc/ipa-icf.c =================================================================== --- gcc/ipa-icf.c (revision 217612) +++ gcc/ipa-icf.c (working copy) @@ -706,7 +706,7 @@ void sem_function::init (void) { if (in_lto_p) - get_node ()->get_body (); + get_node ()->get_untransformed_body (); tree fndecl = node->decl; function *func = DECL_STRUCT_FUNCTION (fndecl); Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 217612) +++ gcc/passes.c (working copy) @@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass) executed. */ invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass); - /* SIPLE IPA passes do not handle callgraphs with IPA transforms in it. - Apply all trnasforms first. */ - if (pass->type == SIMPLE_IPA_PASS) - { - struct cgraph_node *node; - bool applied = false; - FOR_EACH_DEFINED_FUNCTION (node) - if (node->analyzed - && node->has_gimple_body_p () - && (!node->clone_of || node->decl != node->clone_of->decl)) - { - if (!node->global.inlined_to - && node->ipa_transforms_to_apply.exists ()) - { - node->get_body (); - push_cfun (DECL_STRUCT_FUNCTION (node->decl)); - execute_all_ipa_transforms (); - cgraph_edge::rebuild_edges (); - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); - pop_cfun (); - applied = true; - } - } - if (applied) - symtab->remove_unreachable_nodes (false, dump_file); - /* Restore current_pass. */ - current_pass = pass; - } - if (!quiet_flag && !cfun) fprintf (stderr, " <%s>", pass->name ? pass->name : ""); Index: gcc/cgraphunit.c =================================================================== --- gcc/cgraphunit.c (revision 217612) +++ gcc/cgraphunit.c (working copy) @@ -197,7 +197,6 @@ along with GCC; see the file COPYING3. #include "target.h" #include "diagnostic.h" #include "params.h" -#include "fibheap.h" #include "intl.h" #include "hash-map.h" #include "plugin-api.h" @@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a } if (in_lto_p) - get_body (); + get_untransformed_body (); a = DECL_ARGUMENTS (thunk_fndecl); current_function_decl = thunk_fndecl; @@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a gimple ret; if (in_lto_p) - get_body (); + get_untransformed_body (); a = DECL_ARGUMENTS (thunk_fndecl); current_function_decl = thunk_fndecl; @@ -1744,7 +1743,7 @@ cgraph_node::expand (void) announce_function (decl); process = 0; gcc_assert (lowered); - get_body (); + get_untransformed_body (); /* Generate RTL for the body of DECL. */ Index: gcc/cgraph.c =================================================================== --- gcc/cgraph.c (revision 217612) +++ gcc/cgraph.c (working copy) @@ -1664,29 +1664,33 @@ release_function_body (tree decl) { if (DECL_STRUCT_FUNCTION (decl)) { - push_cfun (DECL_STRUCT_FUNCTION (decl)); - if (cfun->cfg - && current_loops) - { - cfun->curr_properties &= ~PROP_loops; - loop_optimizer_finalize (); - } - if (cfun->gimple_df) + if (DECL_STRUCT_FUNCTION (decl)->cfg + || DECL_STRUCT_FUNCTION (decl)->gimple_df) { - delete_tree_ssa (); - delete_tree_cfg_annotations (); - cfun->eh = NULL; - } - if (cfun->cfg) - { - gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); - gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); - clear_edges (); - cfun->cfg = NULL; + push_cfun (DECL_STRUCT_FUNCTION (decl)); + if (cfun->cfg + && current_loops) + { + cfun->curr_properties &= ~PROP_loops; + loop_optimizer_finalize (); + } + if (cfun->gimple_df) + { + delete_tree_ssa (); + delete_tree_cfg_annotations (); + cfun->eh = NULL; + } + if (cfun->cfg) + { + gcc_assert (!dom_info_available_p (CDI_DOMINATORS)); + gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS)); + clear_edges (); + cfun->cfg = NULL; + } + if (cfun->value_histograms) + free_histograms (); + pop_cfun (); } - if (cfun->value_histograms) - free_histograms (); - pop_cfun (); gimple_set_body (decl, NULL); /* Struct function hangs a lot of data that would leak if we didn't removed all pointers to it. */ @@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail present. */ bool -cgraph_node::get_body (void) +cgraph_node::get_untransformed_body (void) { lto_file_decl_data *file_data; const char *data, *name; @@ -3178,6 +3182,44 @@ cgraph_node::get_body (void) return true; } +/* Prepare function body. When doing LTO, read cgraph_node's body from disk + if it is not already present. When some IPA transformations are scheduled, + apply them. */ + +bool +cgraph_node::get_body (void) +{ + bool updated; + + updated = get_untransformed_body (); + + /* Getting transformed body makes no sense for inline clones; + we should never use this on real clones becuase they are materialized + early. + TODO: Materializing clones here will likely lead to smaller LTRANS + footprint. */ + gcc_assert (!global.inlined_to && !clone_of); + if (ipa_transforms_to_apply.exists ()) + { + opt_pass *saved_current_pass = current_pass; + FILE *saved_dump_file = dump_file; + int saved_dump_flags = dump_flags; + + push_cfun (DECL_STRUCT_FUNCTION (decl)); + execute_all_ipa_transforms (); + cgraph_edge::rebuild_edges (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + pop_cfun (); + updated = true; + + current_pass = saved_current_pass; + dump_file = saved_dump_file; + dump_flags = saved_dump_flags; + } + return updated; +} + /* Return the DECL_STRUCT_FUNCTION of the function. */ struct function * Index: gcc/cgraph.h =================================================================== --- gcc/cgraph.h (revision 217612) +++ gcc/cgraph.h (working copy) @@ -933,6 +933,11 @@ public: /* When doing LTO, read cgraph_node's body from disk if it is not already present. */ + bool get_untransformed_body (void); + + /* Prepare function body. When doing LTO, read cgraph_node's body from disk + if it is not already present. When some IPA transformations are scheduled, + apply them. */ bool get_body (void); /* Release memory used to represent body of function. Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 217612) +++ gcc/config/i386/i386.c (working copy) @@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal /* Remember the last target of ix86_set_current_function. */ static GTY(()) tree ix86_previous_fndecl; +/* Set target globals to default. */ + +static void +ix86_reset_to_default_globals (void) +{ + tree old_tree = (ix86_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) + : NULL_TREE); + + if (old_tree) + { + tree new_tree = target_option_current_node; + cl_target_option_restore (&global_options, + TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) + = save_target_globals_default_opts (); + } +} + /* Invalidate ix86_previous_fndecl cache. */ void ix86_reset_previous_fndecl (void) { + ix86_reset_to_default_globals (); ix86_previous_fndecl = NULL_TREE; } @@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl) } else if (old_tree) - { - new_tree = target_option_current_node; - cl_target_option_restore (&global_options, - TREE_TARGET_OPTION (new_tree)); - if (TREE_TARGET_GLOBALS (new_tree)) - restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); - else if (new_tree == target_option_default_node) - restore_target_globals (&default_target_globals); - else - TREE_TARGET_GLOBALS (new_tree) - = save_target_globals_default_opts (); - } + ix86_reset_to_default_globals (); } } @@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); gcc_assert (ok); pop_cfun (); - ix86_previous_fndecl = NULL_TREE; + ix86_reset_previous_fndecl (); ix86_set_current_function (node->decl); } Index: gcc/tree-inline.c =================================================================== --- gcc/tree-inline.c (revision 217612) +++ gcc/tree-inline.c (working copy) @@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp goto egress; } fn = cg_edge->callee->decl; - cg_edge->callee->get_body (); + cg_edge->callee->get_untransformed_body (); #ifdef ENABLE_CHECKING if (cg_edge->callee->decl != id->dst_node->decl) Index: gcc/tree-ssa-structalias.c =================================================================== --- gcc/tree-ssa-structalias.c (revision 217612) +++ gcc/tree-ssa-structalias.c (working copy) @@ -7086,7 +7086,7 @@ ipa_pta_execute (void) /* Nodes without a body are not interesting. Especially do not visit clones at this point for now - we get duplicate decls there for inline clones at least. */ - if (!node->has_gimple_body_p () || node->clone_of) + if (!node->has_gimple_body_p () || node->global.inlined_to) continue; node->get_body ();