From patchwork Mon Nov 7 15:52:20 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: 19/n: trans-mem: middle end/misc patches (LAST PATCH) Date: Mon, 07 Nov 2011 05:52:20 -0000 From: Aldy Hernandez X-Patchwork-Id: 124118 Message-Id: <4EB7FEB4.1040408@redhat.com> To: Richard Guenther Cc: Richard Henderson , gcc-patches > This won't work - DECL_UIDs are not stable -g vs. -g0 - only their > _order_ is stable - thus you won't get comparison fails with code generated > dependent on DECL_UID order, but you will if you depend on the DECL_UID > value (which you do by using it as a hash). > > And we will still generate different object files based on garbage collector > settings this way - GC can shrink hashtables, causing re-hashing, > which changes the order of the elements. It also causes re-ordering > with slight unrelated code changes (but if you say at runtime we always > sort the thing that might not be an issue). Ah, I get it. > Thus, the patch isn't a fix to get a stable order (you can't get > that for hashtable walks). A quick fix is to collect the elements > into a VEC and qsort that after some stable key (like the DECL_UID). I've done so in the attached patch. Notice I am hijacking the alias_pair object, because it has everything we need, though the DECL_UID goes in a field called emitted_diags. I am avoiding having to create an exact same object with the exact same fields. I can change duplicate this, if it's a big eye sore. Preliminary tests show the TM tests all work, but a full test run is still underway. OK pending tests? Index: ChangeLog.tm-merge =================================================================== --- ChangeLog.tm-merge (revision 181067) +++ ChangeLog.tm-merge (working copy) @@ -120,7 +120,7 @@ (GTFILES): Add trans-mem.c. * omp-low.c (WALK_SUBSTMTS): Add GIMPLE_TRANSACTION. * output.h (record_tm_clone_pair, finish_tm_clone_pairs, - finish_tm_clone_pairs_1, get_tm_clone_pair): Declare. + get_tm_clone_pair): Declare. * params.def (PARAM_TM_MAX_AGGREGATE_SIZE): New. * passes.c (init_optimization_passes): Place transactional memory passes. @@ -190,5 +190,5 @@ is_tm_may_cancel_outer, is_tm_ending_fndecl, record_tm_replacement, tm_malloc_replacement): Declare. * varasm.c (tm_clone_pairs): New. - (record_tm_clone_pair, finish_tm_clone_pairs, finish_tm_clone_pairs_1, - get_tm_clone_pair): New. + (record_tm_clone_pair, finish_tm_clone_pairs, get_tm_clone_pair, + dump_tm_clone_to_vec, dump_tm_clone_pairs, alias_pair_cmp): New. Index: varasm.c =================================================================== --- varasm.c (revision 181067) +++ varasm.c (working copy) @@ -5901,58 +5901,104 @@ get_tm_clone_pair (tree o) return NULL_TREE; } -/* Helper function for finish_tm_clone_pairs. Dump the clone table. */ +/* Helper function for finish_tm_clone_pairs. Dump a hash table entry + into a VEC in INFO. */ -int -finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED) +static int +dump_tm_clone_to_vec (void **slot, void *info) { struct tree_map *map = (struct tree_map *) *slot; - bool *switched = (bool *) info; - tree src = map->base.from; - tree dst = map->to; - struct cgraph_node *src_n = cgraph_get_node (src); - struct cgraph_node *dst_n = cgraph_get_node (dst); - - /* The function ipa_tm_create_version() marks the clone as needed if - the original function was needed. But we also mark the clone as - needed if we ever called the clone indirectly through - TM_GETTMCLONE. If neither of these are true, we didn't generate - a clone, and we didn't call it indirectly... no sense keeping it - in the clone table. */ - if (!dst_n || !dst_n->needed) - return 1; + VEC(alias_pair,gc) **alias_pairs = (VEC(alias_pair, gc) **) info; + alias_pair *p = VEC_safe_push (alias_pair, gc, *alias_pairs, NULL); + p->decl = map->base.from; + p->target = map->to; + p->emitted_diags = DECL_UID (p->decl); + return 1; +} - /* This covers the case where we have optimized the original - function away, and only access the transactional clone. */ - if (!src_n || !src_n->needed) - return 1; +/* Dump the actual pairs to the .tm_clone_table section. */ - if (!*switched) +static void +dump_tm_clone_pairs (VEC(alias_pair,gc) *alias_pairs) +{ + unsigned i; + alias_pair *p; + bool switched = false; + + FOR_EACH_VEC_ELT (alias_pair, alias_pairs, i, p) { - switch_to_section (get_named_section (NULL, ".tm_clone_table", 3)); - assemble_align (POINTER_SIZE); - *switched = true; + tree src = p->decl; + tree dst = p->target; + struct cgraph_node *src_n = cgraph_get_node (src); + struct cgraph_node *dst_n = cgraph_get_node (dst); + + /* The function ipa_tm_create_version() marks the clone as needed if + the original function was needed. But we also mark the clone as + needed if we ever called the clone indirectly through + TM_GETTMCLONE. If neither of these are true, we didn't generate + a clone, and we didn't call it indirectly... no sense keeping it + in the clone table. */ + if (!dst_n || !dst_n->needed) + continue; + + /* This covers the case where we have optimized the original + function away, and only access the transactional clone. */ + if (!src_n || !src_n->needed) + continue; + + if (!switched) + { + switch_to_section (get_named_section (NULL, ".tm_clone_table", 3)); + assemble_align (POINTER_SIZE); + switched = true; + } + + assemble_integer (XEXP (DECL_RTL (src), 0), + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); + assemble_integer (XEXP (DECL_RTL (dst), 0), + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); } +} - assemble_integer (XEXP (DECL_RTL (src), 0), - POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); - assemble_integer (XEXP (DECL_RTL (dst), 0), - POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); - return 1; +/* Helper comparison function for qsorting by the DECL_UID stored in + alias_pair->emitted_diags. */ + +static int +alias_pair_cmp (const void *x, const void *y) +{ + const alias_pair *p1 = (const alias_pair *) x; + const alias_pair *p2 = (const alias_pair *) y; + if (p1->emitted_diags < p2->emitted_diags) + return -1; + if (p1->emitted_diags > p2->emitted_diags) + return 1; + return 0; } void finish_tm_clone_pairs (void) { - bool switched = false; + VEC(alias_pair,gc) *alias_pairs = NULL; if (tm_clone_pairs == NULL) return; - htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1, - (void *) &switched); + /* We need a determenistic order for the .tm_clone_table, otherwise + we will get bootstrap comparison failures, so dump the hash table + to a vector, sort it, and dump the vector. */ + + /* Dump the hashtable to a vector. */ + htab_traverse_noresize (tm_clone_pairs, dump_tm_clone_to_vec, + (void *) &alias_pairs); + /* Sort it. */ + VEC_qsort (alias_pair, alias_pairs, alias_pair_cmp); + + /* Dump it. */ + dump_tm_clone_pairs (alias_pairs); + htab_delete (tm_clone_pairs); tm_clone_pairs = NULL; + VEC_free (alias_pair, gc, alias_pairs); } Index: output.h =================================================================== --- output.h (revision 181067) +++ output.h (working copy) @@ -608,7 +608,6 @@ extern void output_section_asm_op (const extern void record_tm_clone_pair (tree, tree); extern void finish_tm_clone_pairs (void); -extern int finish_tm_clone_pairs_1 (void **, void *); extern tree get_tm_clone_pair (tree); extern void default_asm_output_source_filename (FILE *, const char *);