Message ID | 20191018090645.22404-1-iii@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | Move jump threading before reload | expand |
On Fri, Oct 18, 2019 at 11:06:45AM +0200, Ilya Leoshkevich wrote: > Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux > and ppc64le-redhat-linux. The offending patch is in gcc-9_1_0-release > and gcc-9_2_0-release - do I need to backport this fix to gcc-9-branch? It is a regression on 9 (or so I assume), so yes please. > PR rtl-optimization/92007 > * cfgcleanup.c (thread_jump): Add an assertion that we don't > call it after reload if hot/cold partitioning has been done. > (class pass_postreload_jump): Rename to > pass_jump_after_combine. This fits on one line just fine. > (make_pass_postreload_jump): Rename to > make_pass_jump_after_combine. > * passes.def(pass_postreload_jump): Move before reload, rename Space before (. > --- a/gcc/cfgcleanup.c > +++ b/gcc/cfgcleanup.c > @@ -259,6 +259,10 @@ thread_jump (edge e, basic_block b) > bool failed = false; > reg_set_iterator rsi; > > + /* Jump threading may cause fixup_partitions to introduce new crossing edges, > + which is not allowed after reload. */ > + gcc_checking_assert (!reload_completed || !crtl->has_bb_partition); Thanks for the assert, that will help prevent people from running into this again. The patch looks fine to me, but I'm not a global reviewer :-) Segher
On 10/18/19 3:06 AM, Ilya Leoshkevich wrote: > Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux > and ppc64le-redhat-linux. The offending patch is in gcc-9_1_0-release > and gcc-9_2_0-release - do I need to backport this fix to gcc-9-branch? > > > r266734 has introduced a new instance of jump threading pass in order to > take advantage of opportunities that combine opens up. It was perceived > back then that it was beneficial to delay it after reload, since that > might produce even more such opportunities. > > Unfortunately jump threading interferes with hot/cold partitioning. In > the code from PR92007, it converts the following > > +-------------------------- 2/HOT ------------------------+ > | | > v v > 3/HOT --> 5/HOT --> 8/HOT --> 11/COLD --> 6/HOT --EH--> 16/HOT > | ^ > | | > +-------------------------------+ > > into the following: > > +---------------------- 2/HOT ------------------+ > | | > v v > 3/HOT --> 8/HOT --> 11/COLD --> 6/COLD --EH--> 16/HOT > > This makes hot bb 6 dominated by cold bb 11, and because of this > fixup_partitions makes bb 6 cold as well, which in turn makes EH edge > 6->16 a crossing one. Not only can't we have crossing EH edges, we are > also not allowed to introduce new crossing edges after reload in > general, since it might require extra registers on some targets. > > Therefore, move the jump threading pass between combine and hot/cold > partitioning. Building SPEC 2006 and SPEC 2017 with the old and the new > code indicates that: > > * When doing jump threading right after reload, 3889 edges are threaded. > * When doing jump threading right after combine, 3918 edges are > threaded. > > This means this change will not introduce performance regressions. > > gcc/ChangeLog: > > 2019-10-17 Ilya Leoshkevich <iii@linux.ibm.com> > > PR rtl-optimization/92007 > * cfgcleanup.c (thread_jump): Add an assertion that we don't > call it after reload if hot/cold partitioning has been done. > (class pass_postreload_jump): Rename to > pass_jump_after_combine. > (make_pass_postreload_jump): Rename to > make_pass_jump_after_combine. > * passes.def(pass_postreload_jump): Move before reload, rename > to pass_jump_after_combine. > * tree-pass.h (make_pass_postreload_jump): Rename to > make_pass_jump_after_combine. > > gcc/testsuite/ChangeLog: > > 2019-10-17 Ilya Leoshkevich <iii@linux.ibm.com> > > PR rtl-optimization/92007 > * g++.dg/opt/pr92007.C: New test (from Arseny Solokha). OK for trunk and gcc-9 branch. jeff
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index ced7e0a4283..835f7d79ea4 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -259,6 +259,10 @@ thread_jump (edge e, basic_block b) bool failed = false; reg_set_iterator rsi; + /* Jump threading may cause fixup_partitions to introduce new crossing edges, + which is not allowed after reload. */ + gcc_checking_assert (!reload_completed || !crtl->has_bb_partition); + if (b->flags & BB_NONTHREADABLE_BLOCK) return NULL; @@ -3280,10 +3284,10 @@ make_pass_jump (gcc::context *ctxt) namespace { -const pass_data pass_data_postreload_jump = +const pass_data pass_data_jump_after_combine = { RTL_PASS, /* type */ - "postreload_jump", /* name */ + "jump_after_combine", /* name */ OPTGROUP_NONE, /* optinfo_flags */ TV_JUMP, /* tv_id */ 0, /* properties_required */ @@ -3293,20 +3297,20 @@ const pass_data pass_data_postreload_jump = 0, /* todo_flags_finish */ }; -class pass_postreload_jump : public rtl_opt_pass +class pass_jump_after_combine : public rtl_opt_pass { public: - pass_postreload_jump (gcc::context *ctxt) - : rtl_opt_pass (pass_data_postreload_jump, ctxt) + pass_jump_after_combine (gcc::context *ctxt) + : rtl_opt_pass (pass_data_jump_after_combine, ctxt) {} /* opt_pass methods: */ virtual unsigned int execute (function *); -}; // class pass_postreload_jump +}; // class pass_jump_after_combine unsigned int -pass_postreload_jump::execute (function *) +pass_jump_after_combine::execute (function *) { cleanup_cfg (flag_thread_jumps ? CLEANUP_THREADING : 0); return 0; @@ -3315,9 +3319,9 @@ pass_postreload_jump::execute (function *) } // anon namespace rtl_opt_pass * -make_pass_postreload_jump (gcc::context *ctxt) +make_pass_jump_after_combine (gcc::context *ctxt) { - return new pass_postreload_jump (ctxt); + return new pass_jump_after_combine (ctxt); } namespace { diff --git a/gcc/passes.def b/gcc/passes.def index 8999ceec636..798a391bd35 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -439,6 +439,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ud_rtl_dce); NEXT_PASS (pass_combine); NEXT_PASS (pass_if_after_combine); + NEXT_PASS (pass_jump_after_combine); NEXT_PASS (pass_partition_blocks); NEXT_PASS (pass_outof_cfg_layout_mode); NEXT_PASS (pass_split_all_insns); @@ -455,7 +456,6 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_reload); NEXT_PASS (pass_postreload); PUSH_INSERT_PASSES_WITHIN (pass_postreload) - NEXT_PASS (pass_postreload_jump); NEXT_PASS (pass_postreload_cse); NEXT_PASS (pass_gcse2); NEXT_PASS (pass_split_after_reload); diff --git a/gcc/testsuite/g++.dg/opt/pr92007.C b/gcc/testsuite/g++.dg/opt/pr92007.C new file mode 100644 index 00000000000..9434cc929dd --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr92007.C @@ -0,0 +1,32 @@ +// PR rtl-optimization/92007 +// { dg-do compile } +// { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-forwprop --param max-cse-insns=0 -Wno-return-type -std=gnu++98 -freorder-blocks-and-partition" } + +void +sb (int *); + +class d4 { +public: + ~d4(); + void gb (); + int op () { return no; } + int wl () { return tf; } + bool ee () try { gb (); } catch (...) { return false; } + bool b1 () { return (tf == no) ? false : ee (); } + +private: + int no, tf; +}; + +void +hs (int *v9) +{ + d4 p6; + + p6.gb (); + if (p6.op () > p6.wl ()) + { + p6.b1 (); + sb (v9); + } +} diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 85b1c828f3a..a987661530e 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -564,6 +564,7 @@ extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt); extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt); extern rtl_opt_pass *make_pass_combine (gcc::context *ctxt); extern rtl_opt_pass *make_pass_if_after_combine (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_jump_after_combine (gcc::context *ctxt); extern rtl_opt_pass *make_pass_ree (gcc::context *ctxt); extern rtl_opt_pass *make_pass_partition_blocks (gcc::context *ctxt); extern rtl_opt_pass *make_pass_match_asm_constraints (gcc::context *ctxt); @@ -581,7 +582,6 @@ extern rtl_opt_pass *make_pass_clean_state (gcc::context *ctxt); extern rtl_opt_pass *make_pass_branch_prob (gcc::context *ctxt); extern rtl_opt_pass *make_pass_value_profile_transformations (gcc::context *ctxt); -extern rtl_opt_pass *make_pass_postreload_jump (gcc::context *ctxt); extern rtl_opt_pass *make_pass_postreload_cse (gcc::context *ctxt); extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);