Message ID | 20201113175026.GC74972@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
Series | Improve handling of memory operands in ipa-icf 3/4 | expand |
On 11/13/20 6:50 PM, Jan Hubicka wrote: > Bootstrapped/regtested x86_64-linux. I plan to commit it on monday if there are > no complains. Hello Honza. Thank you very much for the patch set. It's a nice improvement and it will eventually fix the WPA slowness caused by IPA ICF. I made some measurements for master before a first patch and this patch (3/4) on godot game engine: BEFORE: Equal symbols: 15690 Totally needed symbols: 17913, fraction of loaded symbols: 39.05% 2156989 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:879 1099887 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:307 1048605 false returned: 'types are not compatible' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:210 1047679 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:632 1047517 false returned: 'GIMPLE NOP LHS type mismatch' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:628 57659 false returned: 'call function types are not compatible' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:573 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:914 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1552 13565 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:567 9919 false returned: 'result types are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:616 Time variable usr sys wall GGC ipa icf : 4.31 ( 7%) 0.06 ( 2%) 4.38 ( 7%) 6008k ( 0%) TOTAL : 57.57 3.49 61.11 4830M AFTER: Equal symbols: 17019 Totally needed symbols: 19875, fraction of loaded symbols: 70.88% 377327 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:886 213086 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:356 212179 false returned: 'compare_ao_refs failed (access path difference)' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:345 159947 false returned: '' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:607 147098 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:699 66123 false returned: 'GIMPLE call operands are different' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:656 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:921 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1580 12643 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:572 6318 false returned: 'different tree types' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:206 Time variable usr sys wall GGC ipa icf : 3.40 ( 6%) 0.09 ( 3%) 3.49 ( 6%) 27M ( 1%) TOTAL : 56.60 2.94 59.58 4478M and I'm also sending usage-wrapper graphs. Martin
> On 11/13/20 6:50 PM, Jan Hubicka wrote: > > Bootstrapped/regtested x86_64-linux. I plan to commit it on monday if there are > > no complains. > > Hello Honza. > > Thank you very much for the patch set. > It's a nice improvement and it will eventually fix the WPA slowness caused by IPA ICF. > > I made some measurements for master before a first patch and this patch (3/4) on godot > game engine: > > BEFORE: > > Equal symbols: 15690 > Totally needed symbols: 17913, fraction of loaded symbols: 39.05% > > 2156989 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:879 > 1099887 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:307 > 1048605 false returned: 'types are not compatible' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:210 > 1047679 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:632 > 1047517 false returned: 'GIMPLE NOP LHS type mismatch' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:628 > 57659 false returned: 'call function types are not compatible' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:573 > 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:914 > 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1552 > 13565 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:567 > 9919 false returned: 'result types are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:616 > > Time variable usr sys wall GGC > ipa icf : 4.31 ( 7%) 0.06 ( 2%) 4.38 ( 7%) 6008k ( 0%) > TOTAL : 57.57 3.49 61.11 4830M > > AFTER: > > Equal symbols: 17019 > Totally needed symbols: 19875, fraction of loaded symbols: 70.88% > > 377327 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:886 > 213086 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:356 > 212179 false returned: 'compare_ao_refs failed (access path difference)' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:345 > 159947 false returned: '' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:607 > 147098 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:699 > 66123 false returned: 'GIMPLE call operands are different' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:656 > 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:921 > 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1580 > 12643 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:572 > 6318 false returned: 'different tree types' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:206 > > Time variable usr sys wall GGC > ipa icf : 3.40 ( 6%) 0.09 ( 3%) 3.49 ( 6%) 27M ( 1%) > TOTAL : 56.60 2.94 59.58 4478M > > and I'm also sending usage-wrapper graphs. Thanks for checking! I also uploaded some data to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92535 Note that you want to also note gimple in timevar since that is also mostly ICF related. It seems that ICF performance is highly sensitive to application: it now behaves very well on cc1plus, seems to do quite well on godot and still does very bad on Firefox (we still have regression there compared to gcc 9 that itself did relatively bad). I noticed one stupid bug in operand_equal_p on coponent_refs (I am just testing a fix) and there are quite few important things that we compare but do not hash. Those should be easy to fix. I plan to iterate through this on firefox. It would be great to get chromium data. Did you suceeded building it recently? I now got last year firefox building and working and I am looking into updating it to current firefox tree that will probaby keep me occupied for some time. Honza > > Martin
On 11/18/20 3:50 PM, Jan Hubicka wrote: >> On 11/13/20 6:50 PM, Jan Hubicka wrote: >>> Bootstrapped/regtested x86_64-linux. I plan to commit it on monday if there are >>> no complains. >> >> Hello Honza. >> >> Thank you very much for the patch set. >> It's a nice improvement and it will eventually fix the WPA slowness caused by IPA ICF. >> >> I made some measurements for master before a first patch and this patch (3/4) on godot >> game engine: >> >> BEFORE: >> >> Equal symbols: 15690 >> Totally needed symbols: 17913, fraction of loaded symbols: 39.05% >> >> 2156989 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:879 >> 1099887 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:307 >> 1048605 false returned: 'types are not compatible' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:210 >> 1047679 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:632 >> 1047517 false returned: 'GIMPLE NOP LHS type mismatch' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:628 >> 57659 false returned: 'call function types are not compatible' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:573 >> 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:914 >> 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1552 >> 13565 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:567 >> 9919 false returned: 'result types are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:616 >> >> Time variable usr sys wall GGC >> ipa icf : 4.31 ( 7%) 0.06 ( 2%) 4.38 ( 7%) 6008k ( 0%) >> TOTAL : 57.57 3.49 61.11 4830M >> >> AFTER: >> >> Equal symbols: 17019 >> Totally needed symbols: 19875, fraction of loaded symbols: 70.88% >> >> 377327 false returned: '' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:886 >> 213086 false returned: 'operand_equal_p failed' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:356 >> 212179 false returned: 'compare_ao_refs failed (access path difference)' in compare_operand at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:345 >> 159947 false returned: '' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:607 >> 147098 false returned: 'GIMPLE assignment operands are different' in compare_gimple_assign at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:699 >> 66123 false returned: 'GIMPLE call operands are different' in compare_gimple_call at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:656 >> 52088 false returned: 'PHI node comparison returns false' in equals_private at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:921 >> 52088 false returned: '' in compare_phi_node at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:1580 >> 12643 false returned: 'decl_or_type flags are different' in equals_wpa at /home/marxin/Programming/gcc2/gcc/ipa-icf.c:572 >> 6318 false returned: 'different tree types' in compatible_types_p at /home/marxin/Programming/gcc2/gcc/ipa-icf-gimple.c:206 >> >> Time variable usr sys wall GGC >> ipa icf : 3.40 ( 6%) 0.09 ( 3%) 3.49 ( 6%) 27M ( 1%) >> TOTAL : 56.60 2.94 59.58 4478M >> >> and I'm also sending usage-wrapper graphs. > > Thanks for checking! I also uploaded some data to > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92535 Nice! > > Note that you want to also note gimple in timevar since that is also > mostly ICF related. I know about that, good reminder. > > It seems that ICF performance is highly sensitive to application: it now > behaves very well on cc1plus, seems to do quite well on godot and still > does very bad on Firefox (we still have regression there compared to gcc > 9 that itself did relatively bad). > > I noticed one stupid bug in operand_equal_p on coponent_refs (I am just > testing a fix) and there are quite few important things that we compare > but do not hash. Those should be easy to fix. I plan to iterate through > this on firefox. > > It would be great to get chromium data. Did you suceeded building it > recently? It has LTO enabled, you can build it: https://build.opensuse.org/package/show/openSUSE:Factory/chromium > I now got last year firefox building and working and I am > looking into updating it to current firefox tree that will probaby keep > me occupied for some time. You can probably also test our package: https://build.opensuse.org/package/show/openSUSE:Factory/MozillaFirefox Martin > > Honza >> >> Martin > >
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index a283195a487..a0f181b5763 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see #include "coverage.h" #include "gimple-pretty-print.h" #include "data-streamer.h" +#include "tree-streamer.h" #include "fold-const.h" #include "calls.h" #include "varasm.h" @@ -86,6 +87,7 @@ along with GCC; see the file COPYING3. If not see #include "dbgcnt.h" #include "tree-vector-builder.h" #include "symtab-thunks.h" +#include "alias.h" using namespace ipa_icf_gimple; @@ -227,14 +229,16 @@ hash_map<const_tree, hashval_t> sem_item::m_type_hash_cache; /* Semantic function constructor that uses STACK as bitmap memory stack. */ sem_function::sem_function (bitmap_obstack *stack) -: sem_item (FUNC, stack), m_checker (NULL), m_compared_func (NULL) + : sem_item (FUNC, stack), memory_access_types (), m_alias_sets_hash (0), + m_checker (NULL), m_compared_func (NULL) { bb_sizes.create (0); bb_sorted.create (0); } sem_function::sem_function (cgraph_node *node, bitmap_obstack *stack) -: sem_item (FUNC, node, stack), m_checker (NULL), m_compared_func (NULL) + : sem_item (FUNC, node, stack), memory_access_types (), + m_alias_sets_hash (0), m_checker (NULL), m_compared_func (NULL) { bb_sizes.create (0); bb_sorted.create (0); @@ -1438,9 +1442,27 @@ sem_function::hash_stmt (gimple *stmt, inchash::hash &hstate) /* All these statements are equivalent if their operands are. */ for (unsigned i = 0; i < gimple_num_ops (stmt); ++i) - m_checker->hash_operand (gimple_op (stmt, i), hstate, 0, - func_checker::get_operand_access_type - (&map, gimple_op (stmt, i))); + { + func_checker::operand_access_type + access_type = func_checker::get_operand_access_type + (&map, gimple_op (stmt, i)); + m_checker->hash_operand (gimple_op (stmt, i), hstate, 0, + access_type); + /* For memory accesses when hasing for LTO stremaing record + base and ref alias ptr types so we can compare them at WPA + time without having to read actual function body. */ + if (access_type == func_checker::OP_MEMORY + && lto_streaming_expected_p () + && flag_strict_aliasing) + { + ao_ref ref; + + ao_ref_init (&ref, gimple_op (stmt, i)); + memory_access_types.safe_push (ao_ref_alias_ptr_type (&ref)); + memory_access_types.safe_push + (ao_ref_base_alias_ptr_type (&ref)); + } + } /* Consider nocf_check attribute in hash as it affects code generation. */ if (code == GIMPLE_CALL @@ -2129,6 +2151,14 @@ sem_item_optimizer::write_summary (void) streamer_write_uhwi_stream (ob->main_stream, node_ref); streamer_write_uhwi (ob, (*item)->get_hash ()); + + if ((*item)->type == FUNC) + { + sem_function *fn = static_cast<sem_function *> (*item); + streamer_write_uhwi (ob, fn->memory_access_types.length ()); + for (unsigned i = 0; i < fn->memory_access_types.length (); i++) + stream_write_tree (ob, fn->memory_access_types[i], true); + } } } @@ -2180,6 +2210,18 @@ sem_item_optimizer::read_section (lto_file_decl_data *file_data, cgraph_node *cnode = dyn_cast <cgraph_node *> (node); sem_function *fn = new sem_function (cnode, &m_bmstack); + unsigned count = streamer_read_uhwi (&ib_main); + inchash::hash hstate (0); + if (flag_incremental_link == INCREMENTAL_LINK_LTO) + fn->memory_access_types.reserve_exact (count); + for (unsigned i = 0; i < count; i++) + { + tree type = stream_read_tree (&ib_main, data_in); + hstate.add_int (get_deref_alias_set (type)); + if (flag_incremental_link == INCREMENTAL_LINK_LTO) + fn->memory_access_types.quick_push (type); + } + fn->m_alias_sets_hash = hstate.end (); fn->set_hash (hash); m_items.safe_push (fn); } @@ -2376,6 +2418,7 @@ sem_item_optimizer::execute (void) build_graph (); update_hash_by_addr_refs (); + update_hash_by_memory_access_type (); build_hash_based_classes (); if (dump_file) @@ -2513,6 +2556,21 @@ sem_item_optimizer::update_hash_by_addr_refs () m_items[i]->set_hash (m_items[i]->global_hash); } +void +sem_item_optimizer::update_hash_by_memory_access_type () +{ + for (unsigned i = 0; i < m_items.length (); i++) + { + if (m_items[i]->type == FUNC) + { + sem_function *fn = static_cast<sem_function *> (m_items[i]); + inchash::hash hstate (fn->get_hash ()); + hstate.add_int (fn->m_alias_sets_hash); + fn->set_hash (hstate.end ()); + } + } +} + /* Congruence classes are built by hash value. */ void diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h index 6dc1a135444..3e0e72428f7 100644 --- a/gcc/ipa-icf.h +++ b/gcc/ipa-icf.h @@ -371,12 +371,19 @@ public: /* GIMPLE codes hash value. */ hashval_t gcode_hash; + /* Vector of subpart of memory access types. */ + vec<tree> memory_access_types; + /* Total number of SSA names used in the function. */ unsigned ssa_names_size; /* Array of structures for all basic blocks. */ vec <ipa_icf_gimple::sem_bb *> bb_sorted; + /* Hash of canonical types used for memory references in the + function. */ + hashval_t m_alias_sets_hash; + /* Return true if parameter I may be used. */ bool param_used_p (unsigned int i); @@ -541,6 +548,9 @@ private: /* For each semantic item, append hash values of references. */ void update_hash_by_addr_refs (); + /* Update hash by canonical types of memory accesses. */ + void update_hash_by_memory_access_type (); + /* Congruence classes are built by hash value. */ void build_hash_based_classes (void);