Message ID | 20200820220019.3131804-4-giuliano.belinassi@usp.br |
---|---|
State | New |
Headers | show |
Series | Parallelize Intra-Procedural Optimizations using the LTO Engine. | expand |
> We also implemented a GNU Make Jobserver integration to this mechanism, > as implemented in jobserver.cc. This works as follows: > diff --git a/gcc/jobserver.cc b/gcc/jobserver.cc > new file mode 100644 > index 00000000000..8cb374de86e > --- /dev/null > +++ b/gcc/jobserver.cc I wonder if this can go in separately and be first used to trottle down the number of streaming processes in WPA? See TODO at the beggining of do_whole_program_analysis and the logic in stream_out_partitions. Adding your API to take tokens for every new partition being streamed (with exception of first one) is probably very easy. Note that there is also logic for detecting jobserv in toplev.c that may be merged with your logic. In longer run I think your jobserv.cc fits more to liberty, but we could have it for GCC only until other tools will want to integreate. Honza
> diff --git a/gcc/cgraph.c b/gcc/cgraph.c > index c0b45795059..22405098dc5 100644 > --- a/gcc/cgraph.c > +++ b/gcc/cgraph.c > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl) > decl_node->remove (); > } > > +/* Release function dominator info if present. */ > + > +void > +cgraph_node::maybe_release_dominators (void) > +{ > + struct function *fun = DECL_STRUCT_FUNCTION (decl); > + > + if (fun && fun->cfg) > + { > + if (dom_info_available_p (fun, CDI_DOMINATORS)) > + free_dominance_info (fun, CDI_DOMINATORS); > + if (dom_info_available_p (fun, CDI_POST_DOMINATORS)) > + free_dominance_info (fun, CDI_POST_DOMINATORS); > + } > +} I am not sure if that needs to be member function, but if so we want to merge it with other places in cgraph.c and cgraphunit.c where dominators are freed. I do not think you need to check avalability. > + > /* Record that DECL1 and DECL2 are semantically identical function > versions. */ > void > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index b4a7871bd3d..72ac19f9672 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -463,6 +463,15 @@ public: > Return NULL if there's no such node. */ > static symtab_node *get_for_asmname (const_tree asmname); > > + /* Get symtab node by order. */ > + static symtab_node *find_by_order (int order); This is quadratic and moreover seems unused. Why do you add it? > + > + /* Get symtab_node by its name. */ > + static symtab_node *find_by_name (const char *); Similarly here, note that names are not really very meaningful as lookup things, since they get duplicated. > + > + /* Get symtab_node by its ASM name. */ > + static symtab_node *find_by_asm_name (const char *); For this we have get_for_asmname (which also populates asm name hash as needed and is not quadratic) > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index d10d635e942..73e4bed3b61 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void) > { > location_t saved_loc; > > + /* FIXME: Find out why body-removed nodes are marked for output. */ > + if (body_removed) > + return; Indeed, we should know :) > + > + > /* We ought to not compile any inline clones. */ > gcc_assert (!inlined_to); > > @@ -2658,6 +2663,7 @@ ipa_passes (void) > > execute_ipa_summary_passes > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes); > + This seems accidental. > } > > /* Some targets need to handle LTO assembler output specially. */ > @@ -2687,10 +2693,17 @@ ipa_passes (void) > if (flag_generate_lto || flag_generate_offload) > targetm.asm_out.lto_end (); > > - if (!flag_ltrans > + if (split_outputs) > + flag_ltrans = true; > + > + if ((!flag_ltrans || split_outputs) > && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO) > || !flag_lto || flag_fat_lto_objects)) > execute_ipa_pass_list (passes->all_regular_ipa_passes); > + > + if (split_outputs) > + flag_ltrans = false; > + > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL); > > bitmap_obstack_release (NULL); > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void) > } > } > > +static bool is_number (const char *str) > +{ > + while (*str != '\0') > + switch (*str++) > + { > + case '0': > + case '1': > + case '2': > + case '3': > + case '4': > + case '5': > + case '6': > + case '7': > + case '8': > + case '9': > + continue; > + default: > + return false; > + } > + > + return true; > +} This looks odd, we have other places where we parse number from command line :) > + > +/* If forked, which child am I? */ > + > +static int childno = -1; > + > +static bool > +maybe_compile_in_parallel (void) > +{ > + struct symtab_node *node; > + int partitions, i, j; > + int *pids; > + > + bool promote_statics = param_promote_statics; > + bool balance = param_balance_partitions; > + bool jobserver = false; > + bool job_auto = false; > + int num_jobs = -1; > + > + if (!flag_parallel_jobs || !split_outputs) > + return false; > + > + if (!strcmp (flag_parallel_jobs, "auto")) > + { > + jobserver = jobserver_initialize (); > + job_auto = true; > + } > + else if (!strcmp (flag_parallel_jobs, "jobserver")) > + jobserver = jobserver_initialize (); > + else if (is_number (flag_parallel_jobs)) > + num_jobs = atoi (flag_parallel_jobs); > + else > + gcc_unreachable (); > + > + if (job_auto && !jobserver) > + { > + num_jobs = sysconf (_SC_NPROCESSORS_CONF); > + if (num_jobs > 2) > + num_jobs = 2; > + } > + > + if (num_jobs < 0 && !jobserver) > + { > + inform (UNKNOWN_LOCATION, > + "-fparallel-jobs=jobserver, but no GNU Jobserver found"); > + return false; > + } > + > + if (jobserver) > + num_jobs = 2; > + > + if (num_jobs == 0) > + { > + inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense"); > + return false; > + } > + > + /* Trick the compiler to think that we are in WPA. */ > + flag_wpa = ""; > + symtab_node::checking_verify_symtab_nodes (); Messing with WPA/ltrans flags is not good idea. You already have split_output for parallel build. I sort of see why you set ltrans, but why WPA? > + > + /* Partition the program so that COMDATs get mapped to the same > + partition. If promote_statics is true, it also maps statics > + to the same partition. If balance is true, try to balance the > + partitions for compilation performance. */ > + lto_merge_comdat_map (balance, promote_statics, num_jobs); > + > + /* AUX pointers are used by partitioning code to bookkeep number of > + partitions symbol is in. This is no longer needed. */ > + FOR_EACH_SYMBOL (node) > + node->aux = NULL; > + > + /* We decided that partitioning is a bad idea. In this case, just > + proceed with the default compilation method. */ > + if (ltrans_partitions.length () <= 1) > + { > + flag_wpa = NULL; > + jobserver_finalize (); > + return false; > + } > + > + /* Find out statics that need to be promoted > + to globals with hidden visibility because they are accessed from > + multiple partitions. */ > + lto_promote_cross_file_statics (promote_statics); > + > + /* Check if we have variables being referenced across partitions. */ > + lto_check_usage_from_other_partitions (); > + > + /* Trick the compiler to think we are not in WPA anymore. */ > + flag_wpa = NULL; > + > + partitions = ltrans_partitions.length (); > + pids = XALLOCAVEC (pid_t, partitions); > + > + /* There is no point in launching more jobs than we have partitions. */ > + if (num_jobs > partitions) > + num_jobs = partitions; > + > + /* Trick the compiler to think we are in LTRANS mode. */ > + flag_ltrans = true; > + > + init_additional_asm_names_file (); > + > + /* Flush asm file, so we don't get repeated output as we fork. */ > + fflush (asm_out_file); > + > + /* Insert a token for child to consume. */ > + if (jobserver) > + { > + num_jobs = partitions; > + jobserver_return_token ('p'); > + } > + > + /* Spawn processes. Spawn as soon as there is a free slot. */ > + for (j = 0, i = -num_jobs; i < partitions; i++, j++) > + { > + if (i >= 0) > + { > + int wstatus, ret; > + ret = waitpid (pids[i], &wstatus, 0); > + > + if (ret < 0) > + internal_error ("Unable to wait child %d to finish", i); > + else if (WIFEXITED (wstatus)) > + { > + if (WEXITSTATUS (wstatus) != 0) > + error ("Child %d exited with error", i); > + } > + else if (WIFSIGNALED (wstatus)) > + error ("Child %d aborted with error", i); > + } > + > + if (j < partitions) > + { > + gcc_assert (ltrans_partitions[j]->symbols > 0); > + > + if (jobserver) > + jobserver_get_token (); > + > + pids[j] = fork (); > + if (pids[j] == 0) > + { > + childno = j; > + lto_apply_partition_mask (ltrans_partitions[j]); > + return true; > + } > + } > + } > + > + /* Get the token which parent inserted for the childs, which they returned by > + now. */ > + if (jobserver) > + jobserver_get_token (); > + exit (0); > +} > + > + > /* Perform simple optimizations based on callgraph. */ > > void > @@ -2768,6 +2960,7 @@ symbol_table::compile (void) > { > timevar_start (TV_CGRAPH_IPA_PASSES); > ipa_passes (); > + maybe_compile_in_parallel (); > timevar_stop (TV_CGRAPH_IPA_PASSES); > } > /* Do nothing else if any IPA pass found errors or if we are just streaming LTO. */ > @@ -2790,6 +2983,9 @@ symbol_table::compile (void) > timevar_pop (TV_CGRAPHOPT); > > /* Output everything. */ > + if (split_outputs) > + handle_additional_asm (childno); What this is doin? > + > switch_to_section (text_section); > (*debug_hooks->assembly_start) (); > if (!quiet_flag) > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > index 2cfab40156e..bc500df4853 100644 > --- a/gcc/ipa-fnsummary.c > +++ b/gcc/ipa-fnsummary.c > @@ -4610,7 +4610,7 @@ public: > gcc_assert (n == 0); > small_p = param; > } > - virtual bool gate (function *) { return true; } > + virtual bool gate (function *) { return !(flag_ltrans && split_outputs); } > virtual unsigned int execute (function *) > { > ipa_free_fn_summary (); > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > index 069de9d82fb..6a5657c7507 100644 > --- a/gcc/ipa-icf.c > +++ b/gcc/ipa-icf.c > @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void) > { > cgraph_node *cnode = static_cast <sem_function *>(item)->get_node (); > > - if (in_lto_p && (cnode->alias || cnode->body_removed)) > + if ((in_lto_p || split_outputs) > + && (cnode->alias || cnode->body_removed)) And I wonder why you need these. IPA passes are run before we split, right? > remove_item (item); > else > filtered.safe_push (item); > diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c > index 7c854f471e8..4d9e11482d3 100644 > --- a/gcc/ipa-visibility.c > +++ b/gcc/ipa-visibility.c > @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node) > static void > localize_node (bool whole_program, symtab_node *node) > { > - gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl)); > + gcc_assert (split_outputs || whole_program || in_lto_p > + || !TREE_PUBLIC (node->decl)); > > /* It is possible that one comdat group contains both hidden and non-hidden > symbols. In this case we can privatize all hidden symbol but we need > diff --git a/gcc/ipa.c b/gcc/ipa.c > index 288b58cf73d..b397ea2fed8 100644 > --- a/gcc/ipa.c > +++ b/gcc/ipa.c > @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > > /* Mark variables that are obviously needed. */ > FOR_EACH_DEFINED_VARIABLE (vnode) > - if (!vnode->can_remove_if_no_refs_p() > + if (!vnode->can_remove_if_no_refs_p () > && !vnode->in_other_partition) > { > reachable.add (vnode); > @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > } > else > gcc_assert (node->clone_of || !node->has_gimple_body_p () > - || in_lto_p || DECL_RESULT (node->decl)); > + || in_lto_p || split_outputs || DECL_RESULT (node->decl)); > } > > /* Inline clones might be kept around so their materializing allows further > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > index 93a99f3465b..12be8546d9c 100644 > --- a/gcc/lto-cgraph.c > +++ b/gcc/lto-cgraph.c > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see > #include "omp-offload.h" > #include "stringpool.h" > #include "attribs.h" > +#include "lto-partition.h" > > /* True when asm nodes has been output. */ > bool asm_nodes_output = false; > @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes) > input_cgraph_opt_section (file_data, data, len, nodes); > } > } > + > +/* When analysing function for removal, we have mainly three states, as > + defined below. */ > + > +enum node_partition_state > +{ > + CAN_REMOVE, /* This node can be removed, or is still to be > + analysed. */ > + IN_CURRENT_PARTITION, /* This node is in current partition and should not be > + touched. */ > + IN_BOUNDARY, /* This node is in boundary, therefore being in other > + partition or is a external symbol, and its body can > + be released. */ > + IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its > + body for inlining, for instance. */ > +}; > + > +/* Handle node that are in the LTRANS boundary, releasing its body and > + other informations if necessary. */ > + > +static void > +handle_node_in_boundary (symtab_node *node, bool keep_body) > +{ > + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) > + { > + if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION) > + { > + /* If marked to be inlined into a node not in current partition, > + then undo the inline. */ > + > + if (cnode->callers) /* This edge could be removed. */ > + cnode->callers->inline_failed = CIF_UNSPECIFIED; > + cnode->inlined_to = NULL; > + } > + > + if (cnode->has_gimple_body_p ()) > + { > + if (!keep_body) > + { > + cnode->maybe_release_dominators (); > + cnode->remove_callees (); > + cnode->remove_all_references (); > + > + /* FIXME: Releasing body of clones can release bodies of functions > + in current partition. */ > + > + /* cnode->release_body (); */ > + cnode->body_removed = true; > + cnode->definition = false; > + cnode->analyzed = false; > + } > + cnode->cpp_implicit_alias = false; > + cnode->alias = false; > + cnode->transparent_alias = false; > + cnode->thunk.thunk_p = false; > + cnode->weakref = false; > + /* After early inlining we drop always_inline attributes on > + bodies of functions that are still referenced (have their > + address taken). */ > + DECL_ATTRIBUTES (cnode->decl) > + = remove_attribute ("always_inline", > + DECL_ATTRIBUTES (node->decl)); > + > + cnode->in_other_partition = true; > + } > + } > + else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl)) > + { > + DECL_EXTERNAL (node->decl) = true; > + node->in_other_partition = true; > + } > +} > + > +/* Check the boundary and expands it if necessary, including more nodes or > + promoting then to a state where their body is required. */ > + > +static void > +compute_boundary (ltrans_partition partition) > +{ > + vec<lto_encoder_entry> &nodes = partition->encoder->nodes; > + symtab_node *node; > + cgraph_node *cnode; > + auto_vec<symtab_node *, 16> mark_to_remove; > + unsigned int i; > + > + FOR_EACH_SYMBOL (node) > + node->aux2 = CAN_REMOVE; There is boundary computation in lto-cgraph.c so we should merge the logic... If you keep the lto-partition datastructures it will compute boundary for you and you can just remove the rest (I got that working at one point). I am also not sure about copy-on-write effect of this. It may be better to keep things around and just teach late passes to not compile things in other partitions, but that is definitly for incremental change. > +void > +init_additional_asm_names_file (void) > +{ > + gcc_assert (split_outputs); > + > + additional_asm_filenames = fopen (split_outputs, "w"); > + if (!additional_asm_filenames) > + error ("Unable to create a temporary write-only file."); > + > + fclose (additional_asm_filenames); > +} Aha, that is what it does :) I wonder if creating the file conditionally (and late) can not lead to race condition where we will use same tmp file name for other build executed in parallel by make. > + > +/* Reinitialize the assembler file and store it in the additional asm file. */ > + > +void > +handle_additional_asm (int childno) > +{ > + gcc_assert (split_outputs); > + > + if (childno < 0) > + return; > + > + const char *temp_asm_name = make_temp_file (".s"); > + asm_file_name = temp_asm_name; > + > + if (asm_out_file == stdout) > + fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout"); > + > + fclose (asm_out_file); > + > + asm_out_file = fopen (temp_asm_name, "w"); > + if (!asm_out_file) > + fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file"); > + > + /* Reopen file as append mode. Here we assume that write to append file is > + atomic, as it is in Linux. */ > + additional_asm_filenames = fopen (split_outputs, "a"); > + if (!additional_asm_filenames) > + fatal_error (UNKNOWN_LOCATION, > + "Unable to open the temporary asm files container"); > + > + fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name); > + fclose (additional_asm_filenames); > +} > + > /* A helper function; used as the reallocator function for cpp's line > table. */ > static void * > @@ -2311,7 +2359,7 @@ do_compile () > > timevar_stop (TV_PHASE_SETUP); > > - compile_file (); > + compile_file (); > } > else > { > @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv) > > finalize_plugins (); > > + if (jobserver_initialized) > + { > + jobserver_return_token (JOBSERVER_NULL_TOKEN); > + jobserver_finalize (); > + } > + > after_memory_report = true; > > if (seen_error () || werrorcount) > diff --git a/gcc/toplev.h b/gcc/toplev.h > index d6c316962b0..3abbf74cd02 100644 > --- a/gcc/toplev.h > +++ b/gcc/toplev.h > @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void); > > extern void initialize_rtl (void); > > +extern void init_additional_asm_names_file (void); > +extern void handle_additional_asm (int); > + > #endif /* ! GCC_TOPLEV_H */ > diff --git a/gcc/varasm.c b/gcc/varasm.c > index 4070f9c17e8..84df52013d7 100644 > --- a/gcc/varasm.c > +++ b/gcc/varasm.c > @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *); > static hashval_t const_hash_1 (const tree); > static int compare_constant (const tree, const tree); > static void output_constant_def_contents (rtx); > -static void output_addressed_constants (tree); > +static void output_addressed_constants (tree, int); > static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, > unsigned int, bool, bool); > static void globalize_decl (tree); > @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED, > > /* Output any data that we will need to use the address of. */ > if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node) > - output_addressed_constants (DECL_INITIAL (decl)); > + output_addressed_constants (DECL_INITIAL (decl), 0); > > /* dbxout.c needs to know this. */ > if (sect && (sect->common.flags & SECTION_CODE) != 0) > @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp) > already have labels. */ > > static constant_descriptor_tree * > -add_constant_to_table (tree exp) > +add_constant_to_table (tree exp, int defer) > { > /* The hash table methods may call output_constant_def for addressed > constants, so handle them first. */ > - output_addressed_constants (exp); > + output_addressed_constants (exp, defer); > > /* Sanity check to catch recursive insertion. */ > static bool inserting; > @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp) > rtx > output_constant_def (tree exp, int defer) > { > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer); > maybe_output_constant_def_contents (desc, defer); > return desc->rtl; > } > @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol) > > /* Make sure any other constants whose addresses appear in EXP > are assigned label numbers. */ > - output_addressed_constants (exp); > + output_addressed_constants (exp, 0); > > /* We are no longer deferring this constant. */ > TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1; > @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp) > tree > tree_output_constant_def (tree exp) > { > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1); > tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0)); > varpool_node::finalize_decl (decl); > return decl; > @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp) > Indicate whether an ADDR_EXPR has been encountered. */ > > static void > -output_addressed_constants (tree exp) > +output_addressed_constants (tree exp, int defer) > { > tree tem; > > @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp) > tem = DECL_INITIAL (tem); > > if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR) > - output_constant_def (tem, 0); > + output_constant_def (tem, defer); > > if (TREE_CODE (tem) == MEM_REF) > - output_addressed_constants (TREE_OPERAND (tem, 0)); > + output_addressed_constants (TREE_OPERAND (tem, 0), defer); > break; > > case PLUS_EXPR: > case POINTER_PLUS_EXPR: > case MINUS_EXPR: > - output_addressed_constants (TREE_OPERAND (exp, 1)); > + output_addressed_constants (TREE_OPERAND (exp, 1), defer); > gcc_fallthrough (); > > CASE_CONVERT: > case VIEW_CONVERT_EXPR: > - output_addressed_constants (TREE_OPERAND (exp, 0)); > + output_addressed_constants (TREE_OPERAND (exp, 0), defer); > break; > > case CONSTRUCTOR: > @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp) > unsigned HOST_WIDE_INT idx; > FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem) > if (tem != 0) > - output_addressed_constants (tem); > + output_addressed_constants (tem, defer); > } > break; Nice job :) Honza
Hi, Honza. Thank you for your detailed review! On 08/27, Jan Hubicka wrote: > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c > > index c0b45795059..22405098dc5 100644 > > --- a/gcc/cgraph.c > > +++ b/gcc/cgraph.c > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl) > > decl_node->remove (); > > } > > > > +/* Release function dominator info if present. */ > > + > > +void > > +cgraph_node::maybe_release_dominators (void) > > +{ > > + struct function *fun = DECL_STRUCT_FUNCTION (decl); > > + > > + if (fun && fun->cfg) > > + { > > + if (dom_info_available_p (fun, CDI_DOMINATORS)) > > + free_dominance_info (fun, CDI_DOMINATORS); > > + if (dom_info_available_p (fun, CDI_POST_DOMINATORS)) > > + free_dominance_info (fun, CDI_POST_DOMINATORS); > > + } > > +} > > I am not sure if that needs to be member function, but if so we want to > merge it with other places in cgraph.c and cgraphunit.c where dominators > are freed. I do not think you need to check avalability. This is necessary to remove some nodes from the callgraph. For some reason, if I node->remove () and it still have the dominance info available, it will fail some assertions on the compiler. However, with regard to code layout, this can be moved to lto-cgraph.c, as it is only used there. > > + > > /* Record that DECL1 and DECL2 are semantically identical function > > versions. */ > > void > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > > index b4a7871bd3d..72ac19f9672 100644 > > --- a/gcc/cgraph.h > > +++ b/gcc/cgraph.h > > @@ -463,6 +463,15 @@ public: > > Return NULL if there's no such node. */ > > static symtab_node *get_for_asmname (const_tree asmname); > > > > + /* Get symtab node by order. */ > > + static symtab_node *find_by_order (int order); > > This is quadratic and moreover seems unused. Why do you add it? I added this for debugging, since I used this a lot inside GDB. Sure, I can remove this without any problems, or print a warning for the developer to avoid calling this in production code. > > + > > + /* Get symtab_node by its name. */ > > + static symtab_node *find_by_name (const char *); > > Similarly here, note that names are not really very meaningful as lookup > things, since they get duplicated. > > + > > + /* Get symtab_node by its ASM name. */ > > + static symtab_node *find_by_asm_name (const char *); > > For this we have get_for_asmname (which also populates asm name hash as > needed and is not quadratic) Cool. I will surely remove this then :) > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > > index d10d635e942..73e4bed3b61 100644 > > --- a/gcc/cgraphunit.c > > +++ b/gcc/cgraphunit.c > > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void) > > { > > location_t saved_loc; > > > > + /* FIXME: Find out why body-removed nodes are marked for output. */ > > + if (body_removed) > > + return; > > Indeed, we should know :) Looks like this was due an early problem. I removed this and bootstrap is working OK. > > + > > + > > /* We ought to not compile any inline clones. */ > > gcc_assert (!inlined_to); > > > > @@ -2658,6 +2663,7 @@ ipa_passes (void) > > > > execute_ipa_summary_passes > > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes); > > + > This seems accidental. Yes. > > } > > > > /* Some targets need to handle LTO assembler output specially. */ > > @@ -2687,10 +2693,17 @@ ipa_passes (void) > > if (flag_generate_lto || flag_generate_offload) > > targetm.asm_out.lto_end (); > > > > - if (!flag_ltrans > > + if (split_outputs) > > + flag_ltrans = true; > > + > > + if ((!flag_ltrans || split_outputs) > > && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO) > > || !flag_lto || flag_fat_lto_objects)) > > execute_ipa_pass_list (passes->all_regular_ipa_passes); > > + > > + if (split_outputs) > > + flag_ltrans = false; > > + > > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL); > > > > bitmap_obstack_release (NULL); > > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void) > > } > > } > > > > +static bool is_number (const char *str) > > +{ > > + while (*str != '\0') > > + switch (*str++) > > + { > > + case '0': > > + case '1': > > + case '2': > > + case '3': > > + case '4': > > + case '5': > > + case '6': > > + case '7': > > + case '8': > > + case '9': > > + continue; > > + default: > > + return false; > > + } > > + > > + return true; > > +} > > This looks odd, we have other places where we parse number from command > line :) isdigit () is poisoned in GCC. But I guess I should look how -flto= does this. > > + > > +/* If forked, which child am I? */ > > + > > +static int childno = -1; > > + > > +static bool > > +maybe_compile_in_parallel (void) > > +{ > > + struct symtab_node *node; > > + int partitions, i, j; > > + int *pids; > > + > > + bool promote_statics = param_promote_statics; > > + bool balance = param_balance_partitions; > > + bool jobserver = false; > > + bool job_auto = false; > > + int num_jobs = -1; > > + > > + if (!flag_parallel_jobs || !split_outputs) > > + return false; > > + > > + if (!strcmp (flag_parallel_jobs, "auto")) > > + { > > + jobserver = jobserver_initialize (); > > + job_auto = true; > > + } > > + else if (!strcmp (flag_parallel_jobs, "jobserver")) > > + jobserver = jobserver_initialize (); > > + else if (is_number (flag_parallel_jobs)) > > + num_jobs = atoi (flag_parallel_jobs); > > + else > > + gcc_unreachable (); > > + > > + if (job_auto && !jobserver) > > + { > > + num_jobs = sysconf (_SC_NPROCESSORS_CONF); > > + if (num_jobs > 2) > > + num_jobs = 2; > > + } > > + > > + if (num_jobs < 0 && !jobserver) > > + { > > + inform (UNKNOWN_LOCATION, > > + "-fparallel-jobs=jobserver, but no GNU Jobserver found"); > > + return false; > > + } > > + > > + if (jobserver) > > + num_jobs = 2; > > + > > + if (num_jobs == 0) > > + { > > + inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense"); > > + return false; > > + } > > + > > + /* Trick the compiler to think that we are in WPA. */ > > + flag_wpa = ""; > > + symtab_node::checking_verify_symtab_nodes (); > > Messing with WPA/ltrans flags is not good idea. You already have > split_output for parallel build. I sort of see why you set ltrans, but > why WPA? Some assertions expected flag_wpa to be present. Sure, I can add split_outputs in these assertions. > > + > > + /* Partition the program so that COMDATs get mapped to the same > > + partition. If promote_statics is true, it also maps statics > > + to the same partition. If balance is true, try to balance the > > + partitions for compilation performance. */ > > + lto_merge_comdat_map (balance, promote_statics, num_jobs); > > + > > + /* AUX pointers are used by partitioning code to bookkeep number of > > + partitions symbol is in. This is no longer needed. */ > > + FOR_EACH_SYMBOL (node) > > + node->aux = NULL; > > + > > + /* We decided that partitioning is a bad idea. In this case, just > > + proceed with the default compilation method. */ > > + if (ltrans_partitions.length () <= 1) > > + { > > + flag_wpa = NULL; > > + jobserver_finalize (); > > + return false; > > + } > > + > > + /* Find out statics that need to be promoted > > + to globals with hidden visibility because they are accessed from > > + multiple partitions. */ > > + lto_promote_cross_file_statics (promote_statics); > > + > > + /* Check if we have variables being referenced across partitions. */ > > + lto_check_usage_from_other_partitions (); > > + > > + /* Trick the compiler to think we are not in WPA anymore. */ > > + flag_wpa = NULL; > > + > > + partitions = ltrans_partitions.length (); > > + pids = XALLOCAVEC (pid_t, partitions); > > + > > + /* There is no point in launching more jobs than we have partitions. */ > > + if (num_jobs > partitions) > > + num_jobs = partitions; > > + > > + /* Trick the compiler to think we are in LTRANS mode. */ > > + flag_ltrans = true; > > + > > + init_additional_asm_names_file (); > > + > > + /* Flush asm file, so we don't get repeated output as we fork. */ > > + fflush (asm_out_file); > > + > > + /* Insert a token for child to consume. */ > > + if (jobserver) > > + { > > + num_jobs = partitions; > > + jobserver_return_token ('p'); > > + } > > + > > + /* Spawn processes. Spawn as soon as there is a free slot. */ > > + for (j = 0, i = -num_jobs; i < partitions; i++, j++) > > + { > > + if (i >= 0) > > + { > > + int wstatus, ret; > > + ret = waitpid (pids[i], &wstatus, 0); > > + > > + if (ret < 0) > > + internal_error ("Unable to wait child %d to finish", i); > > + else if (WIFEXITED (wstatus)) > > + { > > + if (WEXITSTATUS (wstatus) != 0) > > + error ("Child %d exited with error", i); > > + } > > + else if (WIFSIGNALED (wstatus)) > > + error ("Child %d aborted with error", i); > > + } > > + > > + if (j < partitions) > > + { > > + gcc_assert (ltrans_partitions[j]->symbols > 0); > > + > > + if (jobserver) > > + jobserver_get_token (); > > + > > + pids[j] = fork (); > > + if (pids[j] == 0) > > + { > > + childno = j; > > + lto_apply_partition_mask (ltrans_partitions[j]); > > + return true; > > + } > > + } > > + } > > + > > + /* Get the token which parent inserted for the childs, which they returned by > > + now. */ > > + if (jobserver) > > + jobserver_get_token (); > > + exit (0); > > +} > > + > > + > > /* Perform simple optimizations based on callgraph. */ > > > > void > > @@ -2768,6 +2960,7 @@ symbol_table::compile (void) > > { > > timevar_start (TV_CGRAPH_IPA_PASSES); > > ipa_passes (); > > + maybe_compile_in_parallel (); > > timevar_stop (TV_CGRAPH_IPA_PASSES); > > } > > /* Do nothing else if any IPA pass found errors or if we are just streaming LTO. */ > > @@ -2790,6 +2983,9 @@ symbol_table::compile (void) > > timevar_pop (TV_CGRAPHOPT); > > > > /* Output everything. */ > > + if (split_outputs) > > + handle_additional_asm (childno); > What this is doin? Create an auxiliary file in which we will write the name of every assembler output. This will change, as it is a better idea to write them using the main process rather than the child, as Richi already pointed out. > > + > > switch_to_section (text_section); > > (*debug_hooks->assembly_start) (); > > if (!quiet_flag) > > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > > index 2cfab40156e..bc500df4853 100644 > > --- a/gcc/ipa-fnsummary.c > > +++ b/gcc/ipa-fnsummary.c > > @@ -4610,7 +4610,7 @@ public: > > gcc_assert (n == 0); > > small_p = param; > > } > > - virtual bool gate (function *) { return true; } > > + virtual bool gate (function *) { return !(flag_ltrans && split_outputs); } > > virtual unsigned int execute (function *) > > { > > ipa_free_fn_summary (); > > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > > index 069de9d82fb..6a5657c7507 100644 > > --- a/gcc/ipa-icf.c > > +++ b/gcc/ipa-icf.c > > @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void) > > { > > cgraph_node *cnode = static_cast <sem_function *>(item)->get_node (); > > > > - if (in_lto_p && (cnode->alias || cnode->body_removed)) > > + if ((in_lto_p || split_outputs) > > + && (cnode->alias || cnode->body_removed)) > > > And I wonder why you need these. IPA passes are run before we split, > right? This was due to an early problem. I removed this and bootstrap is still working. > > remove_item (item); > > else > > filtered.safe_push (item); > > diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c > > index 7c854f471e8..4d9e11482d3 100644 > > --- a/gcc/ipa-visibility.c > > +++ b/gcc/ipa-visibility.c > > @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node) > > static void > > localize_node (bool whole_program, symtab_node *node) > > { > > - gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl)); > > + gcc_assert (split_outputs || whole_program || in_lto_p > > + || !TREE_PUBLIC (node->decl)); > > > > /* It is possible that one comdat group contains both hidden and non-hidden > > symbols. In this case we can privatize all hidden symbol but we need > > diff --git a/gcc/ipa.c b/gcc/ipa.c > > index 288b58cf73d..b397ea2fed8 100644 > > --- a/gcc/ipa.c > > +++ b/gcc/ipa.c > > @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > > > > /* Mark variables that are obviously needed. */ > > FOR_EACH_DEFINED_VARIABLE (vnode) > > - if (!vnode->can_remove_if_no_refs_p() > > + if (!vnode->can_remove_if_no_refs_p () > > && !vnode->in_other_partition) > > { > > reachable.add (vnode); > > @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > > } > > else > > gcc_assert (node->clone_of || !node->has_gimple_body_p () > > - || in_lto_p || DECL_RESULT (node->decl)); > > + || in_lto_p || split_outputs || DECL_RESULT (node->decl)); > > } > > > > /* Inline clones might be kept around so their materializing allows further > > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > > index 93a99f3465b..12be8546d9c 100644 > > --- a/gcc/lto-cgraph.c > > +++ b/gcc/lto-cgraph.c > > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see > > #include "omp-offload.h" > > #include "stringpool.h" > > #include "attribs.h" > > +#include "lto-partition.h" > > > > /* True when asm nodes has been output. */ > > bool asm_nodes_output = false; > > @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes) > > input_cgraph_opt_section (file_data, data, len, nodes); > > } > > } > > + > > +/* When analysing function for removal, we have mainly three states, as > > + defined below. */ > > + > > +enum node_partition_state > > +{ > > + CAN_REMOVE, /* This node can be removed, or is still to be > > + analysed. */ > > + IN_CURRENT_PARTITION, /* This node is in current partition and should not be > > + touched. */ > > + IN_BOUNDARY, /* This node is in boundary, therefore being in other > > + partition or is a external symbol, and its body can > > + be released. */ > > + IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its > > + body for inlining, for instance. */ > > +}; > > + > > +/* Handle node that are in the LTRANS boundary, releasing its body and > > + other informations if necessary. */ > > + > > +static void > > +handle_node_in_boundary (symtab_node *node, bool keep_body) > > +{ > > + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) > > + { > > + if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION) > > + { > > + /* If marked to be inlined into a node not in current partition, > > + then undo the inline. */ > > + > > + if (cnode->callers) /* This edge could be removed. */ > > + cnode->callers->inline_failed = CIF_UNSPECIFIED; > > + cnode->inlined_to = NULL; > > + } > > + > > + if (cnode->has_gimple_body_p ()) > > + { > > + if (!keep_body) > > + { > > + cnode->maybe_release_dominators (); > > + cnode->remove_callees (); > > + cnode->remove_all_references (); > > + > > + /* FIXME: Releasing body of clones can release bodies of functions > > + in current partition. */ > > + > > + /* cnode->release_body (); */ > > + cnode->body_removed = true; > > + cnode->definition = false; > > + cnode->analyzed = false; > > + } > > + cnode->cpp_implicit_alias = false; > > + cnode->alias = false; > > + cnode->transparent_alias = false; > > + cnode->thunk.thunk_p = false; > > + cnode->weakref = false; > > + /* After early inlining we drop always_inline attributes on > > + bodies of functions that are still referenced (have their > > + address taken). */ > > + DECL_ATTRIBUTES (cnode->decl) > > + = remove_attribute ("always_inline", > > + DECL_ATTRIBUTES (node->decl)); > > + > > + cnode->in_other_partition = true; > > + } > > + } > > + else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl)) > > + { > > + DECL_EXTERNAL (node->decl) = true; > > + node->in_other_partition = true; > > + } > > +} > > + > > +/* Check the boundary and expands it if necessary, including more nodes or > > + promoting then to a state where their body is required. */ > > + > > +static void > > +compute_boundary (ltrans_partition partition) > > +{ > > + vec<lto_encoder_entry> &nodes = partition->encoder->nodes; > > + symtab_node *node; > > + cgraph_node *cnode; > > + auto_vec<symtab_node *, 16> mark_to_remove; > > + unsigned int i; > > + > > + FOR_EACH_SYMBOL (node) > > + node->aux2 = CAN_REMOVE; > > There is boundary computation in lto-cgraph.c so we should merge the > logic... Agree. > If you keep the lto-partition datastructures it will compute boundary > for you and you can just remove the rest (I got that working at one > point). This is interesting, because I could not get that working out of the box. The lto_promote_cross_statics did not provide a fully working boundary that I could simple remove everything else. If you take a closer look, you will see that I am using the already computed boundary as a base, and incrementing that with the extra stuff required (mainly inline clones body). > > I am also not sure about copy-on-write effect of this. It may be better > to keep things around and just teach late passes to not compile things > in other partitions, but that is definitly for incremental change. Well, this looks like a lot of work :) > > +void > > +init_additional_asm_names_file (void) > > +{ > > + gcc_assert (split_outputs); > > + > > + additional_asm_filenames = fopen (split_outputs, "w"); > > + if (!additional_asm_filenames) > > + error ("Unable to create a temporary write-only file."); > > + > > + fclose (additional_asm_filenames); > > +} > > Aha, that is what it does :) > I wonder if creating the file conditionally (and late) can not lead to > race condition where we will use same tmp file name for other build > executed in parallel by make. Hummm. True. Added to my TODO list :) Well, I never had any sort of issues with race condition here, even after stressing it, but this certainly is not proof that it is free of race conditition :) > > + > > +/* Reinitialize the assembler file and store it in the additional asm file. */ > > + > > +void > > +handle_additional_asm (int childno) > > +{ > > + gcc_assert (split_outputs); > > + > > + if (childno < 0) > > + return; > > + > > + const char *temp_asm_name = make_temp_file (".s"); > > + asm_file_name = temp_asm_name; > > + > > + if (asm_out_file == stdout) > > + fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout"); > > + > > + fclose (asm_out_file); > > + > > + asm_out_file = fopen (temp_asm_name, "w"); > > + if (!asm_out_file) > > + fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file"); > > + > > + /* Reopen file as append mode. Here we assume that write to append file is > > + atomic, as it is in Linux. */ > > + additional_asm_filenames = fopen (split_outputs, "a"); > > + if (!additional_asm_filenames) > > + fatal_error (UNKNOWN_LOCATION, > > + "Unable to open the temporary asm files container"); > > + > > + fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name); > > + fclose (additional_asm_filenames); > > +} > > + > > /* A helper function; used as the reallocator function for cpp's line > > table. */ > > static void * > > @@ -2311,7 +2359,7 @@ do_compile () > > > > timevar_stop (TV_PHASE_SETUP); > > > > - compile_file (); > > + compile_file (); > > } > > else > > { > > @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv) > > > > finalize_plugins (); > > > > + if (jobserver_initialized) > > + { > > + jobserver_return_token (JOBSERVER_NULL_TOKEN); > > + jobserver_finalize (); > > + } > > + > > after_memory_report = true; > > > > if (seen_error () || werrorcount) > > diff --git a/gcc/toplev.h b/gcc/toplev.h > > index d6c316962b0..3abbf74cd02 100644 > > --- a/gcc/toplev.h > > +++ b/gcc/toplev.h > > @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void); > > > > extern void initialize_rtl (void); > > > > +extern void init_additional_asm_names_file (void); > > +extern void handle_additional_asm (int); > > + > > #endif /* ! GCC_TOPLEV_H */ > > diff --git a/gcc/varasm.c b/gcc/varasm.c > > index 4070f9c17e8..84df52013d7 100644 > > --- a/gcc/varasm.c > > +++ b/gcc/varasm.c > > @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *); > > static hashval_t const_hash_1 (const tree); > > static int compare_constant (const tree, const tree); > > static void output_constant_def_contents (rtx); > > -static void output_addressed_constants (tree); > > +static void output_addressed_constants (tree, int); > > static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, > > unsigned int, bool, bool); > > static void globalize_decl (tree); > > @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED, > > > > /* Output any data that we will need to use the address of. */ > > if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node) > > - output_addressed_constants (DECL_INITIAL (decl)); > > + output_addressed_constants (DECL_INITIAL (decl), 0); > > > > /* dbxout.c needs to know this. */ > > if (sect && (sect->common.flags & SECTION_CODE) != 0) > > @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp) > > already have labels. */ > > > > static constant_descriptor_tree * > > -add_constant_to_table (tree exp) > > +add_constant_to_table (tree exp, int defer) > > { > > /* The hash table methods may call output_constant_def for addressed > > constants, so handle them first. */ > > - output_addressed_constants (exp); > > + output_addressed_constants (exp, defer); > > > > /* Sanity check to catch recursive insertion. */ > > static bool inserting; > > @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp) > > rtx > > output_constant_def (tree exp, int defer) > > { > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer); > > maybe_output_constant_def_contents (desc, defer); > > return desc->rtl; > > } > > @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol) > > > > /* Make sure any other constants whose addresses appear in EXP > > are assigned label numbers. */ > > - output_addressed_constants (exp); > > + output_addressed_constants (exp, 0); > > > > /* We are no longer deferring this constant. */ > > TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1; > > @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp) > > tree > > tree_output_constant_def (tree exp) > > { > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1); > > tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0)); > > varpool_node::finalize_decl (decl); > > return decl; > > @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp) > > Indicate whether an ADDR_EXPR has been encountered. */ > > > > static void > > -output_addressed_constants (tree exp) > > +output_addressed_constants (tree exp, int defer) > > { > > tree tem; > > > > @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp) > > tem = DECL_INITIAL (tem); > > > > if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR) > > - output_constant_def (tem, 0); > > + output_constant_def (tem, defer); > > > > if (TREE_CODE (tem) == MEM_REF) > > - output_addressed_constants (TREE_OPERAND (tem, 0)); > > + output_addressed_constants (TREE_OPERAND (tem, 0), defer); > > break; > > > > case PLUS_EXPR: > > case POINTER_PLUS_EXPR: > > case MINUS_EXPR: > > - output_addressed_constants (TREE_OPERAND (exp, 1)); > > + output_addressed_constants (TREE_OPERAND (exp, 1), defer); > > gcc_fallthrough (); > > > > CASE_CONVERT: > > case VIEW_CONVERT_EXPR: > > - output_addressed_constants (TREE_OPERAND (exp, 0)); > > + output_addressed_constants (TREE_OPERAND (exp, 0), defer); > > break; > > > > case CONSTRUCTOR: > > @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp) > > unsigned HOST_WIDE_INT idx; > > FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem) > > if (tem != 0) > > - output_addressed_constants (tem); > > + output_addressed_constants (tem, defer); > > } > > break; > > Nice job :) > Honza Thank you, Giuliano.
> Hi, Honza. > > Thank you for your detailed review! > > On 08/27, Jan Hubicka wrote: > > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c > > > index c0b45795059..22405098dc5 100644 > > > --- a/gcc/cgraph.c > > > +++ b/gcc/cgraph.c > > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl) > > > decl_node->remove (); > > > } > > > > > > +/* Release function dominator info if present. */ > > > + > > > +void > > > +cgraph_node::maybe_release_dominators (void) > > > +{ > > > + struct function *fun = DECL_STRUCT_FUNCTION (decl); > > > + > > > + if (fun && fun->cfg) > > > + { > > > + if (dom_info_available_p (fun, CDI_DOMINATORS)) > > > + free_dominance_info (fun, CDI_DOMINATORS); > > > + if (dom_info_available_p (fun, CDI_POST_DOMINATORS)) > > > + free_dominance_info (fun, CDI_POST_DOMINATORS); > > > + } > > > +} > > > > I am not sure if that needs to be member function, but if so we want to > > merge it with other places in cgraph.c and cgraphunit.c where dominators > > are freed. I do not think you need to check avalability. > > This is necessary to remove some nodes from the callgraph. For some > reason, if I node->remove () and it still have the dominance info > available, it will fail some assertions on the compiler. > > However, with regard to code layout, this can be moved to lto-cgraph.c, > as it is only used there. node->remove () is supposed to work here. I relalize that we remove random nodes currently only via the unreachable code removal that also does node->remove_body () so there may be some bug. > > > + /* Get symtab node by order. */ > > > + static symtab_node *find_by_order (int order); > > > > This is quadratic and moreover seems unused. Why do you add it? > > I added this for debugging, since I used this a lot inside GDB. > Sure, I can remove this without any problems, or print a warning > for the developer to avoid calling this in production code. We could keep this as separate patch, since it is independent of rest of changes. Having more debug_* functions is fine with me, but I would like to having nondebug members since sooner or later someone will use them in non-debug code. > > > + /* FIXME: Find out why body-removed nodes are marked for output. */ > > > + if (body_removed) > > > + return; > > > > Indeed, we should know :) > > Looks like this was due an early problem. I removed this and bootstrap > is working OK. Cool, random changes like this looks suspicious :) > > > > This looks odd, we have other places where we parse number from command > > line :) > > isdigit () is poisoned in GCC. But I guess I should look how -flto= > does this. Seems we simply do atoi and compare with 0 that is invalid. > > > > Messing with WPA/ltrans flags is not good idea. You already have > > split_output for parallel build. I sort of see why you set ltrans, but > > why WPA? > > Some assertions expected flag_wpa to be present. Sure, I can add > split_outputs in these assertions. Yep, that would be cleaner. > > > > There is boundary computation in lto-cgraph.c so we should merge the > > logic... > > Agree. > > > If you keep the lto-partition datastructures it will compute boundary > > for you and you can just remove the rest (I got that working at one > > point). > > This is interesting, because I could not get that working out of the > box. The lto_promote_cross_statics did not provide a fully working > boundary that I could simple remove everything else. If you take a > closer look, you will see that I am using the already computed boundary > as a base, and incrementing that with the extra stuff required > (mainly inline clones body). What problems exactly you run into when you remove the extra code? > > > > Aha, that is what it does :) > > I wonder if creating the file conditionally (and late) can not lead to > > race condition where we will use same tmp file name for other build > > executed in parallel by make. > > Hummm. True. Added to my TODO list :) > Well, I never had any sort of issues with race condition here, even > after stressing it, but this certainly is not proof that it is free of > race conditition :) I really do not know how this works, since we create other files with delay too. I suppose we get a tmp filename that is tested to be unique and then we simply assume that we can derive names from it. Honza
On Thu, Aug 27, 2020 at 8:27 PM Giuliano Belinassi <giuliano.belinassi@usp.br> wrote: > > Hi, Honza. > > Thank you for your detailed review! > > On 08/27, Jan Hubicka wrote: > > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c > > > index c0b45795059..22405098dc5 100644 > > > --- a/gcc/cgraph.c > > > +++ b/gcc/cgraph.c > > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl) > > > decl_node->remove (); > > > } > > > > > > +/* Release function dominator info if present. */ > > > + > > > +void > > > +cgraph_node::maybe_release_dominators (void) > > > +{ > > > + struct function *fun = DECL_STRUCT_FUNCTION (decl); > > > + > > > + if (fun && fun->cfg) > > > + { > > > + if (dom_info_available_p (fun, CDI_DOMINATORS)) > > > + free_dominance_info (fun, CDI_DOMINATORS); > > > + if (dom_info_available_p (fun, CDI_POST_DOMINATORS)) > > > + free_dominance_info (fun, CDI_POST_DOMINATORS); > > > + } > > > +} > > > > I am not sure if that needs to be member function, but if so we want to > > merge it with other places in cgraph.c and cgraphunit.c where dominators > > are freed. I do not think you need to check avalability. > > This is necessary to remove some nodes from the callgraph. For some > reason, if I node->remove () and it still have the dominance info > available, it will fail some assertions on the compiler. > > However, with regard to code layout, this can be moved to lto-cgraph.c, > as it is only used there. > > > > + > > > /* Record that DECL1 and DECL2 are semantically identical function > > > versions. */ > > > void > > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > > > index b4a7871bd3d..72ac19f9672 100644 > > > --- a/gcc/cgraph.h > > > +++ b/gcc/cgraph.h > > > @@ -463,6 +463,15 @@ public: > > > Return NULL if there's no such node. */ > > > static symtab_node *get_for_asmname (const_tree asmname); > > > > > > + /* Get symtab node by order. */ > > > + static symtab_node *find_by_order (int order); > > > > This is quadratic and moreover seems unused. Why do you add it? > > I added this for debugging, since I used this a lot inside GDB. > Sure, I can remove this without any problems, or print a warning > for the developer to avoid calling this in production code. > > > > + > > > + /* Get symtab_node by its name. */ > > > + static symtab_node *find_by_name (const char *); > > > > Similarly here, note that names are not really very meaningful as lookup > > things, since they get duplicated. > > > + > > > + /* Get symtab_node by its ASM name. */ > > > + static symtab_node *find_by_asm_name (const char *); > > > > For this we have get_for_asmname (which also populates asm name hash as > > needed and is not quadratic) > > Cool. I will surely remove this then :) > > > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > > > index d10d635e942..73e4bed3b61 100644 > > > --- a/gcc/cgraphunit.c > > > +++ b/gcc/cgraphunit.c > > > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void) > > > { > > > location_t saved_loc; > > > > > > + /* FIXME: Find out why body-removed nodes are marked for output. */ > > > + if (body_removed) > > > + return; > > > > Indeed, we should know :) > > Looks like this was due an early problem. I removed this and bootstrap > is working OK. > > > > + > > > + > > > /* We ought to not compile any inline clones. */ > > > gcc_assert (!inlined_to); > > > > > > @@ -2658,6 +2663,7 @@ ipa_passes (void) > > > > > > execute_ipa_summary_passes > > > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes); > > > + > > This seems accidental. > > Yes. > > > > } > > > > > > /* Some targets need to handle LTO assembler output specially. */ > > > @@ -2687,10 +2693,17 @@ ipa_passes (void) > > > if (flag_generate_lto || flag_generate_offload) > > > targetm.asm_out.lto_end (); > > > > > > - if (!flag_ltrans > > > + if (split_outputs) > > > + flag_ltrans = true; > > > + > > > + if ((!flag_ltrans || split_outputs) > > > && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO) > > > || !flag_lto || flag_fat_lto_objects)) > > > execute_ipa_pass_list (passes->all_regular_ipa_passes); > > > + > > > + if (split_outputs) > > > + flag_ltrans = false; > > > + > > > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL); > > > > > > bitmap_obstack_release (NULL); > > > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void) > > > } > > > } > > > > > > +static bool is_number (const char *str) > > > +{ > > > + while (*str != '\0') > > > + switch (*str++) > > > + { > > > + case '0': > > > + case '1': > > > + case '2': > > > + case '3': > > > + case '4': > > > + case '5': > > > + case '6': > > > + case '7': > > > + case '8': > > > + case '9': > > > + continue; > > > + default: > > > + return false; > > > + } > > > + > > > + return true; > > > +} > > > > This looks odd, we have other places where we parse number from command > > line :) > > isdigit () is poisoned in GCC. But I guess I should look how -flto= > does this. You need to use the all-caps variants, ISDIGIT in this case. > > > + > > > +/* If forked, which child am I? */ > > > + > > > +static int childno = -1; > > > + > > > +static bool > > > +maybe_compile_in_parallel (void) > > > +{ > > > + struct symtab_node *node; > > > + int partitions, i, j; > > > + int *pids; > > > + > > > + bool promote_statics = param_promote_statics; > > > + bool balance = param_balance_partitions; > > > + bool jobserver = false; > > > + bool job_auto = false; > > > + int num_jobs = -1; > > > + > > > + if (!flag_parallel_jobs || !split_outputs) > > > + return false; > > > + > > > + if (!strcmp (flag_parallel_jobs, "auto")) > > > + { > > > + jobserver = jobserver_initialize (); > > > + job_auto = true; > > > + } > > > + else if (!strcmp (flag_parallel_jobs, "jobserver")) > > > + jobserver = jobserver_initialize (); > > > + else if (is_number (flag_parallel_jobs)) > > > + num_jobs = atoi (flag_parallel_jobs); > > > + else > > > + gcc_unreachable (); > > > + > > > + if (job_auto && !jobserver) > > > + { > > > + num_jobs = sysconf (_SC_NPROCESSORS_CONF); > > > + if (num_jobs > 2) > > > + num_jobs = 2; > > > + } > > > + > > > + if (num_jobs < 0 && !jobserver) > > > + { > > > + inform (UNKNOWN_LOCATION, > > > + "-fparallel-jobs=jobserver, but no GNU Jobserver found"); > > > + return false; > > > + } > > > + > > > + if (jobserver) > > > + num_jobs = 2; > > > + > > > + if (num_jobs == 0) > > > + { > > > + inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense"); > > > + return false; > > > + } > > > + > > > + /* Trick the compiler to think that we are in WPA. */ > > > + flag_wpa = ""; > > > + symtab_node::checking_verify_symtab_nodes (); > > > > Messing with WPA/ltrans flags is not good idea. You already have > > split_output for parallel build. I sort of see why you set ltrans, but > > why WPA? > > Some assertions expected flag_wpa to be present. Sure, I can add > split_outputs in these assertions. > > > > + > > > + /* Partition the program so that COMDATs get mapped to the same > > > + partition. If promote_statics is true, it also maps statics > > > + to the same partition. If balance is true, try to balance the > > > + partitions for compilation performance. */ > > > + lto_merge_comdat_map (balance, promote_statics, num_jobs); > > > + > > > + /* AUX pointers are used by partitioning code to bookkeep number of > > > + partitions symbol is in. This is no longer needed. */ > > > + FOR_EACH_SYMBOL (node) > > > + node->aux = NULL; > > > + > > > + /* We decided that partitioning is a bad idea. In this case, just > > > + proceed with the default compilation method. */ > > > + if (ltrans_partitions.length () <= 1) > > > + { > > > + flag_wpa = NULL; > > > + jobserver_finalize (); > > > + return false; > > > + } > > > + > > > + /* Find out statics that need to be promoted > > > + to globals with hidden visibility because they are accessed from > > > + multiple partitions. */ > > > + lto_promote_cross_file_statics (promote_statics); > > > + > > > + /* Check if we have variables being referenced across partitions. */ > > > + lto_check_usage_from_other_partitions (); > > > + > > > + /* Trick the compiler to think we are not in WPA anymore. */ > > > + flag_wpa = NULL; > > > + > > > + partitions = ltrans_partitions.length (); > > > + pids = XALLOCAVEC (pid_t, partitions); > > > + > > > + /* There is no point in launching more jobs than we have partitions. */ > > > + if (num_jobs > partitions) > > > + num_jobs = partitions; > > > + > > > + /* Trick the compiler to think we are in LTRANS mode. */ > > > + flag_ltrans = true; > > > + > > > + init_additional_asm_names_file (); > > > + > > > + /* Flush asm file, so we don't get repeated output as we fork. */ > > > + fflush (asm_out_file); > > > + > > > + /* Insert a token for child to consume. */ > > > + if (jobserver) > > > + { > > > + num_jobs = partitions; > > > + jobserver_return_token ('p'); > > > + } > > > + > > > + /* Spawn processes. Spawn as soon as there is a free slot. */ > > > + for (j = 0, i = -num_jobs; i < partitions; i++, j++) > > > + { > > > + if (i >= 0) > > > + { > > > + int wstatus, ret; > > > + ret = waitpid (pids[i], &wstatus, 0); > > > + > > > + if (ret < 0) > > > + internal_error ("Unable to wait child %d to finish", i); > > > + else if (WIFEXITED (wstatus)) > > > + { > > > + if (WEXITSTATUS (wstatus) != 0) > > > + error ("Child %d exited with error", i); > > > + } > > > + else if (WIFSIGNALED (wstatus)) > > > + error ("Child %d aborted with error", i); > > > + } > > > + > > > + if (j < partitions) > > > + { > > > + gcc_assert (ltrans_partitions[j]->symbols > 0); > > > + > > > + if (jobserver) > > > + jobserver_get_token (); > > > + > > > + pids[j] = fork (); > > > + if (pids[j] == 0) > > > + { > > > + childno = j; > > > + lto_apply_partition_mask (ltrans_partitions[j]); > > > + return true; > > > + } > > > + } > > > + } > > > + > > > + /* Get the token which parent inserted for the childs, which they returned by > > > + now. */ > > > + if (jobserver) > > > + jobserver_get_token (); > > > + exit (0); > > > +} > > > + > > > + > > > /* Perform simple optimizations based on callgraph. */ > > > > > > void > > > @@ -2768,6 +2960,7 @@ symbol_table::compile (void) > > > { > > > timevar_start (TV_CGRAPH_IPA_PASSES); > > > ipa_passes (); > > > + maybe_compile_in_parallel (); > > > timevar_stop (TV_CGRAPH_IPA_PASSES); > > > } > > > /* Do nothing else if any IPA pass found errors or if we are just streaming LTO. */ > > > @@ -2790,6 +2983,9 @@ symbol_table::compile (void) > > > timevar_pop (TV_CGRAPHOPT); > > > > > > /* Output everything. */ > > > + if (split_outputs) > > > + handle_additional_asm (childno); > > What this is doin? > > Create an auxiliary file in which we will write the name of every > assembler output. This will change, as it is a better idea to write > them using the main process rather than the child, as Richi already > pointed out. > > > > + > > > switch_to_section (text_section); > > > (*debug_hooks->assembly_start) (); > > > if (!quiet_flag) > > > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > > > index 2cfab40156e..bc500df4853 100644 > > > --- a/gcc/ipa-fnsummary.c > > > +++ b/gcc/ipa-fnsummary.c > > > @@ -4610,7 +4610,7 @@ public: > > > gcc_assert (n == 0); > > > small_p = param; > > > } > > > - virtual bool gate (function *) { return true; } > > > + virtual bool gate (function *) { return !(flag_ltrans && split_outputs); } > > > virtual unsigned int execute (function *) > > > { > > > ipa_free_fn_summary (); > > > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > > > index 069de9d82fb..6a5657c7507 100644 > > > --- a/gcc/ipa-icf.c > > > +++ b/gcc/ipa-icf.c > > > @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void) > > > { > > > cgraph_node *cnode = static_cast <sem_function *>(item)->get_node (); > > > > > > - if (in_lto_p && (cnode->alias || cnode->body_removed)) > > > + if ((in_lto_p || split_outputs) > > > + && (cnode->alias || cnode->body_removed)) > > > > > > And I wonder why you need these. IPA passes are run before we split, > > right? > > This was due to an early problem. I removed this and bootstrap is > still working. > > > > remove_item (item); > > > else > > > filtered.safe_push (item); > > > diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c > > > index 7c854f471e8..4d9e11482d3 100644 > > > --- a/gcc/ipa-visibility.c > > > +++ b/gcc/ipa-visibility.c > > > @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node) > > > static void > > > localize_node (bool whole_program, symtab_node *node) > > > { > > > - gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl)); > > > + gcc_assert (split_outputs || whole_program || in_lto_p > > > + || !TREE_PUBLIC (node->decl)); > > > > > > /* It is possible that one comdat group contains both hidden and non-hidden > > > symbols. In this case we can privatize all hidden symbol but we need > > > diff --git a/gcc/ipa.c b/gcc/ipa.c > > > index 288b58cf73d..b397ea2fed8 100644 > > > --- a/gcc/ipa.c > > > +++ b/gcc/ipa.c > > > @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > > > > > > /* Mark variables that are obviously needed. */ > > > FOR_EACH_DEFINED_VARIABLE (vnode) > > > - if (!vnode->can_remove_if_no_refs_p() > > > + if (!vnode->can_remove_if_no_refs_p () > > > && !vnode->in_other_partition) > > > { > > > reachable.add (vnode); > > > @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) > > > } > > > else > > > gcc_assert (node->clone_of || !node->has_gimple_body_p () > > > - || in_lto_p || DECL_RESULT (node->decl)); > > > + || in_lto_p || split_outputs || DECL_RESULT (node->decl)); > > > } > > > > > > /* Inline clones might be kept around so their materializing allows further > > > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > > > index 93a99f3465b..12be8546d9c 100644 > > > --- a/gcc/lto-cgraph.c > > > +++ b/gcc/lto-cgraph.c > > > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see > > > #include "omp-offload.h" > > > #include "stringpool.h" > > > #include "attribs.h" > > > +#include "lto-partition.h" > > > > > > /* True when asm nodes has been output. */ > > > bool asm_nodes_output = false; > > > @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes) > > > input_cgraph_opt_section (file_data, data, len, nodes); > > > } > > > } > > > + > > > +/* When analysing function for removal, we have mainly three states, as > > > + defined below. */ > > > + > > > +enum node_partition_state > > > +{ > > > + CAN_REMOVE, /* This node can be removed, or is still to be > > > + analysed. */ > > > + IN_CURRENT_PARTITION, /* This node is in current partition and should not be > > > + touched. */ > > > + IN_BOUNDARY, /* This node is in boundary, therefore being in other > > > + partition or is a external symbol, and its body can > > > + be released. */ > > > + IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its > > > + body for inlining, for instance. */ > > > +}; > > > + > > > +/* Handle node that are in the LTRANS boundary, releasing its body and > > > + other informations if necessary. */ > > > + > > > +static void > > > +handle_node_in_boundary (symtab_node *node, bool keep_body) > > > +{ > > > + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) > > > + { > > > + if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION) > > > + { > > > + /* If marked to be inlined into a node not in current partition, > > > + then undo the inline. */ > > > + > > > + if (cnode->callers) /* This edge could be removed. */ > > > + cnode->callers->inline_failed = CIF_UNSPECIFIED; > > > + cnode->inlined_to = NULL; > > > + } > > > + > > > + if (cnode->has_gimple_body_p ()) > > > + { > > > + if (!keep_body) > > > + { > > > + cnode->maybe_release_dominators (); > > > + cnode->remove_callees (); > > > + cnode->remove_all_references (); > > > + > > > + /* FIXME: Releasing body of clones can release bodies of functions > > > + in current partition. */ > > > + > > > + /* cnode->release_body (); */ > > > + cnode->body_removed = true; > > > + cnode->definition = false; > > > + cnode->analyzed = false; > > > + } > > > + cnode->cpp_implicit_alias = false; > > > + cnode->alias = false; > > > + cnode->transparent_alias = false; > > > + cnode->thunk.thunk_p = false; > > > + cnode->weakref = false; > > > + /* After early inlining we drop always_inline attributes on > > > + bodies of functions that are still referenced (have their > > > + address taken). */ > > > + DECL_ATTRIBUTES (cnode->decl) > > > + = remove_attribute ("always_inline", > > > + DECL_ATTRIBUTES (node->decl)); > > > + > > > + cnode->in_other_partition = true; > > > + } > > > + } > > > + else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl)) > > > + { > > > + DECL_EXTERNAL (node->decl) = true; > > > + node->in_other_partition = true; > > > + } > > > +} > > > + > > > +/* Check the boundary and expands it if necessary, including more nodes or > > > + promoting then to a state where their body is required. */ > > > + > > > +static void > > > +compute_boundary (ltrans_partition partition) > > > +{ > > > + vec<lto_encoder_entry> &nodes = partition->encoder->nodes; > > > + symtab_node *node; > > > + cgraph_node *cnode; > > > + auto_vec<symtab_node *, 16> mark_to_remove; > > > + unsigned int i; > > > + > > > + FOR_EACH_SYMBOL (node) > > > + node->aux2 = CAN_REMOVE; > > > > There is boundary computation in lto-cgraph.c so we should merge the > > logic... > > Agree. > > > If you keep the lto-partition datastructures it will compute boundary > > for you and you can just remove the rest (I got that working at one > > point). > > This is interesting, because I could not get that working out of the > box. The lto_promote_cross_statics did not provide a fully working > boundary that I could simple remove everything else. If you take a > closer look, you will see that I am using the already computed boundary > as a base, and incrementing that with the extra stuff required > (mainly inline clones body). > > > > > I am also not sure about copy-on-write effect of this. It may be better > > to keep things around and just teach late passes to not compile things > > in other partitions, but that is definitly for incremental change. > > Well, this looks like a lot of work :) > > > > +void > > > +init_additional_asm_names_file (void) > > > +{ > > > + gcc_assert (split_outputs); > > > + > > > + additional_asm_filenames = fopen (split_outputs, "w"); > > > + if (!additional_asm_filenames) > > > + error ("Unable to create a temporary write-only file."); > > > + > > > + fclose (additional_asm_filenames); > > > +} > > > > Aha, that is what it does :) > > I wonder if creating the file conditionally (and late) can not lead to > > race condition where we will use same tmp file name for other build > > executed in parallel by make. > > Hummm. True. Added to my TODO list :) > Well, I never had any sort of issues with race condition here, even > after stressing it, but this certainly is not proof that it is free of > race conditition :) > > > > + > > > +/* Reinitialize the assembler file and store it in the additional asm file. */ > > > + > > > +void > > > +handle_additional_asm (int childno) > > > +{ > > > + gcc_assert (split_outputs); > > > + > > > + if (childno < 0) > > > + return; > > > + > > > + const char *temp_asm_name = make_temp_file (".s"); > > > + asm_file_name = temp_asm_name; > > > + > > > + if (asm_out_file == stdout) > > > + fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout"); > > > + > > > + fclose (asm_out_file); > > > + > > > + asm_out_file = fopen (temp_asm_name, "w"); > > > + if (!asm_out_file) > > > + fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file"); > > > + > > > + /* Reopen file as append mode. Here we assume that write to append file is > > > + atomic, as it is in Linux. */ > > > + additional_asm_filenames = fopen (split_outputs, "a"); > > > + if (!additional_asm_filenames) > > > + fatal_error (UNKNOWN_LOCATION, > > > + "Unable to open the temporary asm files container"); > > > + > > > + fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name); > > > + fclose (additional_asm_filenames); > > > +} > > > + > > > /* A helper function; used as the reallocator function for cpp's line > > > table. */ > > > static void * > > > @@ -2311,7 +2359,7 @@ do_compile () > > > > > > timevar_stop (TV_PHASE_SETUP); > > > > > > - compile_file (); > > > + compile_file (); > > > } > > > else > > > { > > > @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv) > > > > > > finalize_plugins (); > > > > > > + if (jobserver_initialized) > > > + { > > > + jobserver_return_token (JOBSERVER_NULL_TOKEN); > > > + jobserver_finalize (); > > > + } > > > + > > > after_memory_report = true; > > > > > > if (seen_error () || werrorcount) > > > diff --git a/gcc/toplev.h b/gcc/toplev.h > > > index d6c316962b0..3abbf74cd02 100644 > > > --- a/gcc/toplev.h > > > +++ b/gcc/toplev.h > > > @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void); > > > > > > extern void initialize_rtl (void); > > > > > > +extern void init_additional_asm_names_file (void); > > > +extern void handle_additional_asm (int); > > > + > > > #endif /* ! GCC_TOPLEV_H */ > > > diff --git a/gcc/varasm.c b/gcc/varasm.c > > > index 4070f9c17e8..84df52013d7 100644 > > > --- a/gcc/varasm.c > > > +++ b/gcc/varasm.c > > > @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *); > > > static hashval_t const_hash_1 (const tree); > > > static int compare_constant (const tree, const tree); > > > static void output_constant_def_contents (rtx); > > > -static void output_addressed_constants (tree); > > > +static void output_addressed_constants (tree, int); > > > static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, > > > unsigned int, bool, bool); > > > static void globalize_decl (tree); > > > @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED, > > > > > > /* Output any data that we will need to use the address of. */ > > > if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node) > > > - output_addressed_constants (DECL_INITIAL (decl)); > > > + output_addressed_constants (DECL_INITIAL (decl), 0); > > > > > > /* dbxout.c needs to know this. */ > > > if (sect && (sect->common.flags & SECTION_CODE) != 0) > > > @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp) > > > already have labels. */ > > > > > > static constant_descriptor_tree * > > > -add_constant_to_table (tree exp) > > > +add_constant_to_table (tree exp, int defer) > > > { > > > /* The hash table methods may call output_constant_def for addressed > > > constants, so handle them first. */ > > > - output_addressed_constants (exp); > > > + output_addressed_constants (exp, defer); > > > > > > /* Sanity check to catch recursive insertion. */ > > > static bool inserting; > > > @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp) > > > rtx > > > output_constant_def (tree exp, int defer) > > > { > > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer); > > > maybe_output_constant_def_contents (desc, defer); > > > return desc->rtl; > > > } > > > @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol) > > > > > > /* Make sure any other constants whose addresses appear in EXP > > > are assigned label numbers. */ > > > - output_addressed_constants (exp); > > > + output_addressed_constants (exp, 0); > > > > > > /* We are no longer deferring this constant. */ > > > TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1; > > > @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp) > > > tree > > > tree_output_constant_def (tree exp) > > > { > > > - struct constant_descriptor_tree *desc = add_constant_to_table (exp); > > > + struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1); > > > tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0)); > > > varpool_node::finalize_decl (decl); > > > return decl; > > > @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp) > > > Indicate whether an ADDR_EXPR has been encountered. */ > > > > > > static void > > > -output_addressed_constants (tree exp) > > > +output_addressed_constants (tree exp, int defer) > > > { > > > tree tem; > > > > > > @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp) > > > tem = DECL_INITIAL (tem); > > > > > > if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR) > > > - output_constant_def (tem, 0); > > > + output_constant_def (tem, defer); > > > > > > if (TREE_CODE (tem) == MEM_REF) > > > - output_addressed_constants (TREE_OPERAND (tem, 0)); > > > + output_addressed_constants (TREE_OPERAND (tem, 0), defer); > > > break; > > > > > > case PLUS_EXPR: > > > case POINTER_PLUS_EXPR: > > > case MINUS_EXPR: > > > - output_addressed_constants (TREE_OPERAND (exp, 1)); > > > + output_addressed_constants (TREE_OPERAND (exp, 1), defer); > > > gcc_fallthrough (); > > > > > > CASE_CONVERT: > > > case VIEW_CONVERT_EXPR: > > > - output_addressed_constants (TREE_OPERAND (exp, 0)); > > > + output_addressed_constants (TREE_OPERAND (exp, 0), defer); > > > break; > > > > > > case CONSTRUCTOR: > > > @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp) > > > unsigned HOST_WIDE_INT idx; > > > FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem) > > > if (tem != 0) > > > - output_addressed_constants (tem); > > > + output_addressed_constants (tem, defer); > > > } > > > break; > > > > Nice job :) > > Honza > > Thank you, > Giuliano.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index be42b15f4ff..c00617cfc1a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1437,6 +1437,7 @@ OBJS = \ ira-color.o \ ira-emit.o \ ira-lives.o \ + jobserver.o \ jump.o \ langhooks.o \ lcm.o \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index c0b45795059..22405098dc5 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree decl) decl_node->remove (); } +/* Release function dominator info if present. */ + +void +cgraph_node::maybe_release_dominators (void) +{ + struct function *fun = DECL_STRUCT_FUNCTION (decl); + + if (fun && fun->cfg) + { + if (dom_info_available_p (fun, CDI_DOMINATORS)) + free_dominance_info (fun, CDI_DOMINATORS); + if (dom_info_available_p (fun, CDI_POST_DOMINATORS)) + free_dominance_info (fun, CDI_POST_DOMINATORS); + } +} + /* Record that DECL1 and DECL2 are semantically identical function versions. */ void diff --git a/gcc/cgraph.h b/gcc/cgraph.h index b4a7871bd3d..72ac19f9672 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -463,6 +463,15 @@ public: Return NULL if there's no such node. */ static symtab_node *get_for_asmname (const_tree asmname); + /* Get symtab node by order. */ + static symtab_node *find_by_order (int order); + + /* Get symtab_node by its name. */ + static symtab_node *find_by_name (const char *); + + /* Get symtab_node by its ASM name. */ + static symtab_node *find_by_asm_name (const char *); + /* Verify symbol table for internal consistency. */ static DEBUG_FUNCTION void verify_symtab_nodes (void); @@ -1338,6 +1347,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node /* Dump the callgraph to file F. */ static void dump_cgraph (FILE *f); + /* Release function dominator info if available. */ + void maybe_release_dominators (); + /* Dump the call graph to stderr. */ static inline void debug_cgraph (void) diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index d10d635e942..73e4bed3b61 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2258,6 +2258,11 @@ cgraph_node::expand (void) { location_t saved_loc; + /* FIXME: Find out why body-removed nodes are marked for output. */ + if (body_removed) + return; + + /* We ought to not compile any inline clones. */ gcc_assert (!inlined_to); @@ -2658,6 +2663,7 @@ ipa_passes (void) execute_ipa_summary_passes ((ipa_opt_pass_d *) passes->all_regular_ipa_passes); + } /* Some targets need to handle LTO assembler output specially. */ @@ -2687,10 +2693,17 @@ ipa_passes (void) if (flag_generate_lto || flag_generate_offload) targetm.asm_out.lto_end (); - if (!flag_ltrans + if (split_outputs) + flag_ltrans = true; + + if ((!flag_ltrans || split_outputs) && ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO) || !flag_lto || flag_fat_lto_objects)) execute_ipa_pass_list (passes->all_regular_ipa_passes); + + if (split_outputs) + flag_ltrans = false; + invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL); bitmap_obstack_release (NULL); @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void) } } +static bool is_number (const char *str) +{ + while (*str != '\0') + switch (*str++) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + continue; + default: + return false; + } + + return true; +} + +/* If forked, which child am I? */ + +static int childno = -1; + +static bool +maybe_compile_in_parallel (void) +{ + struct symtab_node *node; + int partitions, i, j; + int *pids; + + bool promote_statics = param_promote_statics; + bool balance = param_balance_partitions; + bool jobserver = false; + bool job_auto = false; + int num_jobs = -1; + + if (!flag_parallel_jobs || !split_outputs) + return false; + + if (!strcmp (flag_parallel_jobs, "auto")) + { + jobserver = jobserver_initialize (); + job_auto = true; + } + else if (!strcmp (flag_parallel_jobs, "jobserver")) + jobserver = jobserver_initialize (); + else if (is_number (flag_parallel_jobs)) + num_jobs = atoi (flag_parallel_jobs); + else + gcc_unreachable (); + + if (job_auto && !jobserver) + { + num_jobs = sysconf (_SC_NPROCESSORS_CONF); + if (num_jobs > 2) + num_jobs = 2; + } + + if (num_jobs < 0 && !jobserver) + { + inform (UNKNOWN_LOCATION, + "-fparallel-jobs=jobserver, but no GNU Jobserver found"); + return false; + } + + if (jobserver) + num_jobs = 2; + + if (num_jobs == 0) + { + inform (UNKNOWN_LOCATION, "-fparallel-jobs=0 makes no sense"); + return false; + } + + /* Trick the compiler to think that we are in WPA. */ + flag_wpa = ""; + symtab_node::checking_verify_symtab_nodes (); + + /* Partition the program so that COMDATs get mapped to the same + partition. If promote_statics is true, it also maps statics + to the same partition. If balance is true, try to balance the + partitions for compilation performance. */ + lto_merge_comdat_map (balance, promote_statics, num_jobs); + + /* AUX pointers are used by partitioning code to bookkeep number of + partitions symbol is in. This is no longer needed. */ + FOR_EACH_SYMBOL (node) + node->aux = NULL; + + /* We decided that partitioning is a bad idea. In this case, just + proceed with the default compilation method. */ + if (ltrans_partitions.length () <= 1) + { + flag_wpa = NULL; + jobserver_finalize (); + return false; + } + + /* Find out statics that need to be promoted + to globals with hidden visibility because they are accessed from + multiple partitions. */ + lto_promote_cross_file_statics (promote_statics); + + /* Check if we have variables being referenced across partitions. */ + lto_check_usage_from_other_partitions (); + + /* Trick the compiler to think we are not in WPA anymore. */ + flag_wpa = NULL; + + partitions = ltrans_partitions.length (); + pids = XALLOCAVEC (pid_t, partitions); + + /* There is no point in launching more jobs than we have partitions. */ + if (num_jobs > partitions) + num_jobs = partitions; + + /* Trick the compiler to think we are in LTRANS mode. */ + flag_ltrans = true; + + init_additional_asm_names_file (); + + /* Flush asm file, so we don't get repeated output as we fork. */ + fflush (asm_out_file); + + /* Insert a token for child to consume. */ + if (jobserver) + { + num_jobs = partitions; + jobserver_return_token ('p'); + } + + /* Spawn processes. Spawn as soon as there is a free slot. */ + for (j = 0, i = -num_jobs; i < partitions; i++, j++) + { + if (i >= 0) + { + int wstatus, ret; + ret = waitpid (pids[i], &wstatus, 0); + + if (ret < 0) + internal_error ("Unable to wait child %d to finish", i); + else if (WIFEXITED (wstatus)) + { + if (WEXITSTATUS (wstatus) != 0) + error ("Child %d exited with error", i); + } + else if (WIFSIGNALED (wstatus)) + error ("Child %d aborted with error", i); + } + + if (j < partitions) + { + gcc_assert (ltrans_partitions[j]->symbols > 0); + + if (jobserver) + jobserver_get_token (); + + pids[j] = fork (); + if (pids[j] == 0) + { + childno = j; + lto_apply_partition_mask (ltrans_partitions[j]); + return true; + } + } + } + + /* Get the token which parent inserted for the childs, which they returned by + now. */ + if (jobserver) + jobserver_get_token (); + exit (0); +} + + /* Perform simple optimizations based on callgraph. */ void @@ -2768,6 +2960,7 @@ symbol_table::compile (void) { timevar_start (TV_CGRAPH_IPA_PASSES); ipa_passes (); + maybe_compile_in_parallel (); timevar_stop (TV_CGRAPH_IPA_PASSES); } /* Do nothing else if any IPA pass found errors or if we are just streaming LTO. */ @@ -2790,6 +2983,9 @@ symbol_table::compile (void) timevar_pop (TV_CGRAPHOPT); /* Output everything. */ + if (split_outputs) + handle_additional_asm (childno); + switch_to_section (text_section); (*debug_hooks->assembly_start) (); if (!quiet_flag) diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 2cfab40156e..bc500df4853 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -4610,7 +4610,7 @@ public: gcc_assert (n == 0); small_p = param; } - virtual bool gate (function *) { return true; } + virtual bool gate (function *) { return !(flag_ltrans && split_outputs); } virtual unsigned int execute (function *) { ipa_free_fn_summary (); diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index 069de9d82fb..6a5657c7507 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -2345,7 +2345,8 @@ sem_item_optimizer::filter_removed_items (void) { cgraph_node *cnode = static_cast <sem_function *>(item)->get_node (); - if (in_lto_p && (cnode->alias || cnode->body_removed)) + if ((in_lto_p || split_outputs) + && (cnode->alias || cnode->body_removed)) remove_item (item); else filtered.safe_push (item); diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c index 7c854f471e8..4d9e11482d3 100644 --- a/gcc/ipa-visibility.c +++ b/gcc/ipa-visibility.c @@ -540,7 +540,8 @@ optimize_weakref (symtab_node *node) static void localize_node (bool whole_program, symtab_node *node) { - gcc_assert (whole_program || in_lto_p || !TREE_PUBLIC (node->decl)); + gcc_assert (split_outputs || whole_program || in_lto_p + || !TREE_PUBLIC (node->decl)); /* It is possible that one comdat group contains both hidden and non-hidden symbols. In this case we can privatize all hidden symbol but we need diff --git a/gcc/ipa.c b/gcc/ipa.c index 288b58cf73d..b397ea2fed8 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -350,7 +350,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) /* Mark variables that are obviously needed. */ FOR_EACH_DEFINED_VARIABLE (vnode) - if (!vnode->can_remove_if_no_refs_p() + if (!vnode->can_remove_if_no_refs_p () && !vnode->in_other_partition) { reachable.add (vnode); @@ -564,7 +564,7 @@ symbol_table::remove_unreachable_nodes (FILE *file) } else gcc_assert (node->clone_of || !node->has_gimple_body_p () - || in_lto_p || DECL_RESULT (node->decl)); + || in_lto_p || split_outputs || DECL_RESULT (node->decl)); } /* Inline clones might be kept around so their materializing allows further diff --git a/gcc/jobserver.cc b/gcc/jobserver.cc new file mode 100644 index 00000000000..8cb374de86e --- /dev/null +++ b/gcc/jobserver.cc @@ -0,0 +1,168 @@ +/* GNU Jobserver Integration Interface. + Copyright (C) 2005-2020 Free Software Foundation, Inc. + + Contributed by Giuliano Belinassi <giuliano.belinassi@usp.br> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "jobserver.h" +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic.h" +#include "errno.h" + +/* Token which make sent when invoking GCC. */ +#define JOBSERVER_MAKE_TOKEN '+' + +bool jobserver_initialized = false; +bool nonblock_mode = false; +static int wfd = -1; +static int rfd = -1; + +jobserver_token_t jobserver_curr_token = JOBSERVER_NULL_TOKEN; + +/* When using GNU Jobserver but the user did not prepend the recursive make + token `+' to the GCC invocation, Make can close the file descriptors used + to comunicate with us, and there is no reliable way to detect this. + Therefore the best we can do is crash and alert the user to do hte right + thing. */ +static void jobserver_crash () +{ + fatal_error (UNKNOWN_LOCATION, + "-fparallel-jobs=jobserver, but Make jobserver pipe is closed"); +} + +/* Initialize this interface. We try to find whether the Jobserver is active + and working. */ +bool jobserver_initialize () +{ + bool success; + const char *makeflags = getenv ("MAKEFLAGS"); + if (makeflags == NULL) + return false; + + const char *needle = "--jobserver-auth="; + const char *n = strstr (makeflags, needle); + if (n == NULL) + return false; + + success = (sscanf (n + strlen (needle), "%d,%d", &rfd, &wfd) == 2 + && rfd > 0 + && wfd > 0 + && is_valid_fd (rfd) + && is_valid_fd (wfd)); + + if (!success) + return false; + + struct stat statbuf; + if (fstat (rfd, &statbuf) < 0 + || (statbuf.st_mode & S_IFMT) != S_IFIFO + || fstat (wfd, &statbuf) < 0 + || (statbuf.st_mode & S_IFMT) != S_IFIFO) + return false; + + int flags = fcntl (rfd, F_GETFL); + if (flags < 0) + return false; + + /* Mark that rfd is O_NONBLOCK, as we will have to use select. */ + if (flags & O_NONBLOCK) + nonblock_mode = true; + + return (jobserver_initialized = true); +} + +/* Finalize the jobserver, so this interface could be used again later. */ +bool jobserver_finalize () +{ + if (!jobserver_initialized) + return false; + + close (rfd); + close (wfd); + + rfd = wfd = -1; + nonblock_mode = false; + + jobserver_initialized = false; + return true; +} + +/* Return token to the jobserver. If c is the NULL token, then return + the last token we got. */ +void jobserver_return_token (jobserver_token_t c) +{ + ssize_t w; + + if (c == JOBSERVER_NULL_TOKEN) + c = jobserver_curr_token; + + w = write (wfd, &c, sizeof (jobserver_token_t)); + + if (w <= 0) + jobserver_crash (); +} + +/* TODO: Check if select if available in our system. */ +#define HAVE_SELECT + +/* Retrieve a token from the Jobserver. We have two cases, in which we must be + careful. First is when the function pselect is available in our system, as + Make will set the read fd as nonblocking and will expect that we use select. + (see posixos.c in GNU Make sourcecode). + The other is when select is not available in our system, and Make will set + it as blocking. */ +char jobserver_get_token () +{ + jobserver_token_t ret = JOBSERVER_NULL_TOKEN; + ssize_t r = -1; + + while (r < 0) + { + if (nonblock_mode) + { +#ifdef HAVE_SELECT + fd_set readfd_set; + + FD_ZERO (&readfd_set); + FD_SET (rfd, &readfd_set); + + r = select (rfd+1, &readfd_set, NULL, NULL, NULL); + + if (r < 0 && errno == EAGAIN) + continue; + + gcc_assert (r > 0); +#else + internal_error ("Make set Jobserver pipe to nonblock mode, but " + " select is not supported in your system"); +#endif + } + + r = read (rfd, &ret, sizeof (jobserver_token_t)); + + if (!(r > 0 || (r < 0 && errno == EAGAIN))) + { + jobserver_crash (); + break; + } + } + + return (jobserver_curr_token = ret); +} diff --git a/gcc/jobserver.h b/gcc/jobserver.h new file mode 100644 index 00000000000..2047cf4cdbf --- /dev/null +++ b/gcc/jobserver.h @@ -0,0 +1,33 @@ +/* GNU Jobserver Integration Interface. + Copyright (C) 2005-2020 Free Software Foundation, Inc. + + Contributed by Giuliano Belinassi <giuliano.belinassi@usp.br> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define JOBSERVER_NULL_TOKEN ('\0') + +typedef char jobserver_token_t; + +extern bool jobserver_initialized; +extern jobserver_token_t jobserver_curr_token; + +bool jobserver_initialize (); +bool jobserver_finalize (); +jobserver_token_t jobserver_get_token (); +void jobserver_return_token (jobserver_token_t); + diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 93a99f3465b..12be8546d9c 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-offload.h" #include "stringpool.h" #include "attribs.h" +#include "lto-partition.h" /* True when asm nodes has been output. */ bool asm_nodes_output = false; @@ -2065,3 +2066,174 @@ input_cgraph_opt_summary (vec<symtab_node *> nodes) input_cgraph_opt_section (file_data, data, len, nodes); } } + +/* When analysing function for removal, we have mainly three states, as + defined below. */ + +enum node_partition_state +{ + CAN_REMOVE, /* This node can be removed, or is still to be + analysed. */ + IN_CURRENT_PARTITION, /* This node is in current partition and should not be + touched. */ + IN_BOUNDARY, /* This node is in boundary, therefore being in other + partition or is a external symbol, and its body can + be released. */ + IN_BOUNDARY_KEEP_BODY /* This symbol is in other partition but we may need its + body for inlining, for instance. */ +}; + +/* Handle node that are in the LTRANS boundary, releasing its body and + other informations if necessary. */ + +static void +handle_node_in_boundary (symtab_node *node, bool keep_body) +{ + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + { + if (cnode->inlined_to && cnode->inlined_to->aux2 != IN_CURRENT_PARTITION) + { + /* If marked to be inlined into a node not in current partition, + then undo the inline. */ + + if (cnode->callers) /* This edge could be removed. */ + cnode->callers->inline_failed = CIF_UNSPECIFIED; + cnode->inlined_to = NULL; + } + + if (cnode->has_gimple_body_p ()) + { + if (!keep_body) + { + cnode->maybe_release_dominators (); + cnode->remove_callees (); + cnode->remove_all_references (); + + /* FIXME: Releasing body of clones can release bodies of functions + in current partition. */ + + /* cnode->release_body (); */ + cnode->body_removed = true; + cnode->definition = false; + cnode->analyzed = false; + } + cnode->cpp_implicit_alias = false; + cnode->alias = false; + cnode->transparent_alias = false; + cnode->thunk.thunk_p = false; + cnode->weakref = false; + /* After early inlining we drop always_inline attributes on + bodies of functions that are still referenced (have their + address taken). */ + DECL_ATTRIBUTES (cnode->decl) + = remove_attribute ("always_inline", + DECL_ATTRIBUTES (node->decl)); + + cnode->in_other_partition = true; + } + } + else if (is_a <varpool_node *> (node) && !DECL_EXTERNAL (node->decl)) + { + DECL_EXTERNAL (node->decl) = true; + node->in_other_partition = true; + } +} + +/* Check the boundary and expands it if necessary, including more nodes or + promoting then to a state where their body is required. */ + +static void +compute_boundary (ltrans_partition partition) +{ + vec<lto_encoder_entry> &nodes = partition->encoder->nodes; + symtab_node *node; + cgraph_node *cnode; + auto_vec<symtab_node *, 16> mark_to_remove; + unsigned int i; + + FOR_EACH_SYMBOL (node) + node->aux2 = CAN_REMOVE; + + /* Lets assign the information that the encoder gave to us. */ + for (i = 0; i < nodes.length (); i++) + { + node = nodes[i].node; + if (nodes[i].in_partition) + { + node->aux2 = IN_CURRENT_PARTITION; + node->in_other_partition = false; + } + else if (nodes[i].body) + node->aux2 = IN_BOUNDARY_KEEP_BODY; + else + node->aux2 = IN_BOUNDARY; + } + + /* Then look for nodes that was marked to be inlined to. If it is marked to + be inlined into a node that is in current partition, then mark its body to + not be removed. Also expand the boundary for nodes that requires that + its body not to be removed. */ + for (i = 0; i < nodes.length (); i++) + { + cnode = dyn_cast <cgraph_node *> (nodes[i].node); + if (!cnode) + continue; + + /* Promote nodes that will be inlined into a node in current partiton. */ + if (cnode->inlined_to && cnode->inlined_to->aux2 == IN_CURRENT_PARTITION + && cnode->aux2 == IN_BOUNDARY) + cnode->aux2 = IN_BOUNDARY_KEEP_BODY; + + /* Expand the boundary based on nodes in boundary that requires their + body to be present. */ + if (cnode->aux2 == IN_BOUNDARY_KEEP_BODY) + for (cgraph_edge *e = cnode->callees; e; e = e->next_callee) + if (e->callee->aux2 == CAN_REMOVE) + e->callee->aux2 = IN_BOUNDARY; + } +} + +/* Replace the partition in the symbol table, removing every node which is not + in partition. */ + +void +lto_apply_partition_mask (ltrans_partition partition) +{ + symtab_node *node; + auto_vec<symtab_node *, 16> mark_to_remove; + unsigned int i; + + compute_boundary (partition); + + FOR_EACH_SYMBOL (node) + switch (node->aux2) + { + case IN_CURRENT_PARTITION: + continue; + + case CAN_REMOVE: + mark_to_remove.safe_push (node); + break; + + case IN_BOUNDARY: + handle_node_in_boundary (node, false); + break; + + case IN_BOUNDARY_KEEP_BODY: + handle_node_in_boundary (node, true); + break; + + default: + gcc_unreachable (); + } + + /* Finally remove queued nodes. */ + for (i = 0; i < mark_to_remove.length (); i++) + { + symtab_node *node = mark_to_remove[i]; + if (is_a <cgraph_node *> (node)) + dyn_cast <cgraph_node *> (node)->maybe_release_dominators (); + + node->remove (); + } +} diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 0129f00cc1a..2ff23d16e52 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -916,6 +916,10 @@ bool reachable_from_this_partition_p (struct cgraph_node *, lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder); void select_what_to_stream (void); +struct ltrans_partition_def; +void lto_apply_partition_mask (struct ltrans_partition_def *partition); + + /* In options-save.c. */ void cl_target_option_stream_out (struct output_block *, struct bitpack_d *, struct cl_target_option *); diff --git a/gcc/symtab.c b/gcc/symtab.c index d7dfbb676df..669095af820 100644 --- a/gcc/symtab.c +++ b/gcc/symtab.c @@ -297,9 +297,13 @@ symbol_table::change_decl_assembler_name (tree decl, tree name) unlink_from_assembler_name_hash (node, true); const char *old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) - && DECL_RTL_SET_P (decl)) - warning (0, "%qD renamed after being referenced in assembly", decl); + if (DECL_RTL_SET_P (decl)) + { + if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) + && DECL_RTL_SET_P (decl)) + warning (0, "%qD renamed after being referenced in assembly", decl); + SET_DECL_RTL (decl, NULL); + } SET_DECL_ASSEMBLER_NAME (decl, name); if (alias) @@ -965,6 +969,8 @@ symtab_node::dump_base (FILE *f) if (lto_file_data) fprintf (f, " Read from file: %s\n", lto_file_data->file_name); + + fprintf (f, " AUX2: %d\n", aux2); } /* Dump symtab node to F. */ @@ -2504,3 +2510,37 @@ symtab_node::output_to_lto_symbol_table_p (void) } return true; } + +DEBUG_FUNCTION symtab_node * +symtab_node::find_by_order (int order) +{ + symtab_node *node; + FOR_EACH_SYMBOL (node) + if (node->order == order) + return node; + + return NULL; +} + +DEBUG_FUNCTION symtab_node * +symtab_node::find_by_name (const char * name) +{ + symtab_node *node; + FOR_EACH_SYMBOL (node) + if (!strcmp (node->name (), name)) + return node; + + return NULL; +} + +DEBUG_FUNCTION symtab_node * +symtab_node::find_by_asm_name (const char *asm_name) +{ + symtab_node *node; + FOR_EACH_SYMBOL (node) + if (!strcmp (node->asm_name (), asm_name)) + return node; + + return NULL; + +} diff --git a/gcc/toplev.c b/gcc/toplev.c index 07457d08c3a..95450880aab 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see #include "dump-context.h" #include "print-tree.h" #include "optinfo-emit-json.h" +#include "jobserver.h" #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO) #include "dbxout.h" @@ -104,7 +105,6 @@ static void do_compile (); static void process_options (void); static void backend_init (void); static int lang_dependent_init (const char *); -static void init_asm_output (const char *); static void finalize (bool); static void crash_signal (int) ATTRIBUTE_NORETURN; @@ -177,6 +177,9 @@ FILE *callgraph_info_file = NULL; static bitmap callgraph_info_external_printed; FILE *stack_usage_file = NULL; +/* Output file to write additional asm filenames. */ +FILE *additional_asm_filenames = NULL; + /* The current working directory of a translation. It's generally the directory from which compilation was initiated, but a preprocessed file may specify the original directory in which it was @@ -895,6 +898,51 @@ init_asm_output (const char *name) } } +void +init_additional_asm_names_file (void) +{ + gcc_assert (split_outputs); + + additional_asm_filenames = fopen (split_outputs, "w"); + if (!additional_asm_filenames) + error ("Unable to create a temporary write-only file."); + + fclose (additional_asm_filenames); +} + +/* Reinitialize the assembler file and store it in the additional asm file. */ + +void +handle_additional_asm (int childno) +{ + gcc_assert (split_outputs); + + if (childno < 0) + return; + + const char *temp_asm_name = make_temp_file (".s"); + asm_file_name = temp_asm_name; + + if (asm_out_file == stdout) + fatal_error (UNKNOWN_LOCATION, "Unexpected asm output to stdout"); + + fclose (asm_out_file); + + asm_out_file = fopen (temp_asm_name, "w"); + if (!asm_out_file) + fatal_error (UNKNOWN_LOCATION, "Unable to create asm output file"); + + /* Reopen file as append mode. Here we assume that write to append file is + atomic, as it is in Linux. */ + additional_asm_filenames = fopen (split_outputs, "a"); + if (!additional_asm_filenames) + fatal_error (UNKNOWN_LOCATION, + "Unable to open the temporary asm files container"); + + fprintf (additional_asm_filenames, "%d %s\n", childno, asm_file_name); + fclose (additional_asm_filenames); +} + /* A helper function; used as the reallocator function for cpp's line table. */ static void * @@ -2311,7 +2359,7 @@ do_compile () timevar_stop (TV_PHASE_SETUP); - compile_file (); + compile_file (); } else { @@ -2477,6 +2525,12 @@ toplev::main (int argc, char **argv) finalize_plugins (); + if (jobserver_initialized) + { + jobserver_return_token (JOBSERVER_NULL_TOKEN); + jobserver_finalize (); + } + after_memory_report = true; if (seen_error () || werrorcount) diff --git a/gcc/toplev.h b/gcc/toplev.h index d6c316962b0..3abbf74cd02 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -103,4 +103,7 @@ extern void parse_alignment_opts (void); extern void initialize_rtl (void); +extern void init_additional_asm_names_file (void); +extern void handle_additional_asm (int); + #endif /* ! GCC_TOPLEV_H */ diff --git a/gcc/varasm.c b/gcc/varasm.c index 4070f9c17e8..84df52013d7 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -110,7 +110,7 @@ static void decode_addr_const (tree, class addr_const *); static hashval_t const_hash_1 (const tree); static int compare_constant (const tree, const tree); static void output_constant_def_contents (rtx); -static void output_addressed_constants (tree); +static void output_addressed_constants (tree, int); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, unsigned int, bool, bool); static void globalize_decl (tree); @@ -2272,7 +2272,7 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED, /* Output any data that we will need to use the address of. */ if (DECL_INITIAL (decl) && DECL_INITIAL (decl) != error_mark_node) - output_addressed_constants (DECL_INITIAL (decl)); + output_addressed_constants (DECL_INITIAL (decl), 0); /* dbxout.c needs to know this. */ if (sect && (sect->common.flags & SECTION_CODE) != 0) @@ -3426,11 +3426,11 @@ build_constant_desc (tree exp) already have labels. */ static constant_descriptor_tree * -add_constant_to_table (tree exp) +add_constant_to_table (tree exp, int defer) { /* The hash table methods may call output_constant_def for addressed constants, so handle them first. */ - output_addressed_constants (exp); + output_addressed_constants (exp, defer); /* Sanity check to catch recursive insertion. */ static bool inserting; @@ -3474,7 +3474,7 @@ add_constant_to_table (tree exp) rtx output_constant_def (tree exp, int defer) { - struct constant_descriptor_tree *desc = add_constant_to_table (exp); + struct constant_descriptor_tree *desc = add_constant_to_table (exp, defer); maybe_output_constant_def_contents (desc, defer); return desc->rtl; } @@ -3544,7 +3544,7 @@ output_constant_def_contents (rtx symbol) /* Make sure any other constants whose addresses appear in EXP are assigned label numbers. */ - output_addressed_constants (exp); + output_addressed_constants (exp, 0); /* We are no longer deferring this constant. */ TREE_ASM_WRITTEN (decl) = TREE_ASM_WRITTEN (exp) = 1; @@ -3608,7 +3608,7 @@ lookup_constant_def (tree exp) tree tree_output_constant_def (tree exp) { - struct constant_descriptor_tree *desc = add_constant_to_table (exp); + struct constant_descriptor_tree *desc = add_constant_to_table (exp, 1); tree decl = SYMBOL_REF_DECL (XEXP (desc->rtl, 0)); varpool_node::finalize_decl (decl); return decl; @@ -4327,7 +4327,7 @@ compute_reloc_for_constant (tree exp) Indicate whether an ADDR_EXPR has been encountered. */ static void -output_addressed_constants (tree exp) +output_addressed_constants (tree exp, int defer) { tree tem; @@ -4347,21 +4347,21 @@ output_addressed_constants (tree exp) tem = DECL_INITIAL (tem); if (CONSTANT_CLASS_P (tem) || TREE_CODE (tem) == CONSTRUCTOR) - output_constant_def (tem, 0); + output_constant_def (tem, defer); if (TREE_CODE (tem) == MEM_REF) - output_addressed_constants (TREE_OPERAND (tem, 0)); + output_addressed_constants (TREE_OPERAND (tem, 0), defer); break; case PLUS_EXPR: case POINTER_PLUS_EXPR: case MINUS_EXPR: - output_addressed_constants (TREE_OPERAND (exp, 1)); + output_addressed_constants (TREE_OPERAND (exp, 1), defer); gcc_fallthrough (); CASE_CONVERT: case VIEW_CONVERT_EXPR: - output_addressed_constants (TREE_OPERAND (exp, 0)); + output_addressed_constants (TREE_OPERAND (exp, 0), defer); break; case CONSTRUCTOR: @@ -4369,7 +4369,7 @@ output_addressed_constants (tree exp) unsigned HOST_WIDE_INT idx; FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, tem) if (tem != 0) - output_addressed_constants (tem); + output_addressed_constants (tem, defer); } break;