Message ID | 0c1cd0e5706087d51c0d981a313786990ddcad89.1562518763.git.segher@kernel.crashing.org |
---|---|
State | New |
Headers | show |
Series | subreg: Add -fsplit-wide-types-early (PR88233) | expand |
On Sun, Jul 7, 2019 at 7:12 PM Segher Boessenkool <segher@kernel.crashing.org> wrote: > > Currently the second lower-subreg pass is run right before RA. This > is much too late to be very useful. At least for targets that do not > have RTL patterns for operations on multi-register modes it is a lot > better to split patterns earlier, before combine and all related > passes. > > This adds an option -fsplit-wide-types-early that does that, and > enables it by default for rs6000. Do you by chance have a (target specific) testcase you can add? The docs should mention that the new option doesn't have any effect unless -fsplit-wide-types is enabled. Otherwise OK. Richard. > > 2019-07-07 Segher Boessenkool <segher@kernel.crashing.org> > > PR rtl-optimization/88233 > * common.opt (fsplit-wide-types-early): New option. > * common/config/rs6000/rs6000-common.c > (rs6000_option_optimization_table): Add OPT_fsplit_wide_types_early for > OPT_LEVELS_ALL. > * doc/invoke.texi (Optimization Options): Add -fsplit-wide-types-early. > * lower-subreg.c (pass_lower_subreg2::gate): Add test for > flag_split_wide_types_early. > (pass_data_lower_subreg3): New. > (pass_lower_subreg3): New. > (make_pass_lower_subreg3): New. > * passes.def (pass_lower_subreg2): Move after the loop passes. > (pass_lower_subreg3): New, inserted where pass_lower_subreg2 was. > * tree-pass.h (make_pass_lower_subreg2): Move up, to its new place in > the pass pipeline; its previous place is taken by ... > (make_pass_lower_subreg3): ... this. > > --- > gcc/common.opt | 4 +++ > gcc/common/config/rs6000/rs6000-common.c | 2 ++ > gcc/doc/invoke.texi | 8 +++++- > gcc/lower-subreg.c | 46 +++++++++++++++++++++++++++++++- > gcc/passes.def | 3 ++- > gcc/tree-pass.h | 3 ++- > 6 files changed, 62 insertions(+), 4 deletions(-) > > diff --git a/gcc/common.opt b/gcc/common.opt > index 41514df..b998b25 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -2430,6 +2430,10 @@ fsplit-wide-types > Common Report Var(flag_split_wide_types) Optimization > Split wide types into independent registers. > > +fsplit-wide-types-early > +Common Report Var(flag_split_wide_types_early) Optimization > +Split wide types into independent registers earlier. > + > fssa-backprop > Common Report Var(flag_ssa_backprop) Init(1) Optimization > Enable backward propagation of use properties at the SSA level. > diff --git a/gcc/common/config/rs6000/rs6000-common.c b/gcc/common/config/rs6000/rs6000-common.c > index 9857b54..4b0c205 100644 > --- a/gcc/common/config/rs6000/rs6000-common.c > +++ b/gcc/common/config/rs6000/rs6000-common.c > @@ -31,6 +31,8 @@ > /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ > static const struct default_options rs6000_option_optimization_table[] = > { > + /* Split multi-word types early. */ > + { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, > /* Enable -fsched-pressure for first pass instruction scheduling. */ > { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, > { OPT_LEVELS_NONE, 0, NULL, 0 } > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 73d16b5..6349d4c 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -467,7 +467,7 @@ Objective-C and Objective-C++ Dialects}. > -fsignaling-nans @gol > -fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol > -fsplit-paths @gol > --fsplit-wide-types -fssa-backprop -fssa-phiopt @gol > +-fsplit-wide-types -fsplit-wide-types-early -fssa-backprop -fssa-phiopt @gol > -fstdarg-opt -fstore-merging -fstrict-aliasing @gol > -fthread-jumps -ftracer -ftree-bit-ccp @gol > -ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol > @@ -8731,6 +8731,12 @@ but may make debugging more difficult. > Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, > @option{-Os}. > > +@item -fsplit-wide-types-early > +@opindex fsplit-wide-types-early > +Fully split wide types early, instead of very late. > + > +This is the default on some targets. > + > @item -fcse-follow-jumps > @opindex fcse-follow-jumps > In common subexpression elimination (CSE), scan through jump instructions > diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c > index 4f68a73..e1418e5 100644 > --- a/gcc/lower-subreg.c > +++ b/gcc/lower-subreg.c > @@ -1801,7 +1801,8 @@ public: > {} > > /* opt_pass methods: */ > - virtual bool gate (function *) { return flag_split_wide_types != 0; } > + virtual bool gate (function *) { return flag_split_wide_types > + && flag_split_wide_types_early; } > virtual unsigned int execute (function *) > { > decompose_multiword_subregs (true); > @@ -1817,3 +1818,46 @@ make_pass_lower_subreg2 (gcc::context *ctxt) > { > return new pass_lower_subreg2 (ctxt); > } > + > +/* Implement third lower subreg pass. */ > + > +namespace { > + > +const pass_data pass_data_lower_subreg3 = > +{ > + RTL_PASS, /* type */ > + "subreg3", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_LOWER_SUBREG, /* tv_id */ > + 0, /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + TODO_df_finish, /* todo_flags_finish */ > +}; > + > +class pass_lower_subreg3 : public rtl_opt_pass > +{ > +public: > + pass_lower_subreg3 (gcc::context *ctxt) > + : rtl_opt_pass (pass_data_lower_subreg3, ctxt) > + {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) { return flag_split_wide_types > + && !flag_split_wide_types_early; } > + virtual unsigned int execute (function *) > + { > + decompose_multiword_subregs (true); > + return 0; > + } > + > +}; // class pass_lower_subreg3 > + > +} // anon namespace > + > +rtl_opt_pass * > +make_pass_lower_subreg3 (gcc::context *ctxt) > +{ > + return new pass_lower_subreg3 (ctxt); > +} > diff --git a/gcc/passes.def b/gcc/passes.def > index 9a5b0cd..1a7fd14 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -427,6 +427,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_rtl_doloop); > NEXT_PASS (pass_rtl_loop_done); > POP_INSERT_PASSES () > + NEXT_PASS (pass_lower_subreg2); > NEXT_PASS (pass_web); > NEXT_PASS (pass_rtl_cprop); > NEXT_PASS (pass_cse2); > @@ -440,7 +441,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_partition_blocks); > NEXT_PASS (pass_outof_cfg_layout_mode); > NEXT_PASS (pass_split_all_insns); > - NEXT_PASS (pass_lower_subreg2); > + NEXT_PASS (pass_lower_subreg3); > NEXT_PASS (pass_df_initialize_no_opt); > NEXT_PASS (pass_stack_ptr_mod); > NEXT_PASS (pass_mode_switching); > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index b27dbdd..7b83293 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -552,6 +552,7 @@ extern rtl_opt_pass *make_pass_rtl_unroll_loops (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); > > +extern rtl_opt_pass *make_pass_lower_subreg2 (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_web (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_cse2 (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_df_initialize_opt (gcc::context *ctxt); > @@ -567,7 +568,7 @@ extern rtl_opt_pass *make_pass_partition_blocks (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_match_asm_constraints (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_split_all_insns (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_fast_rtl_byte_dce (gcc::context *ctxt); > -extern rtl_opt_pass *make_pass_lower_subreg2 (gcc::context *ctxt); > +extern rtl_opt_pass *make_pass_lower_subreg3 (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_mode_switching (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_sms (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_sched (gcc::context *ctxt); > -- > 1.8.3.1 >
On Mon, Jul 08, 2019 at 01:27:25PM +0200, Richard Biener wrote: > On Sun, Jul 7, 2019 at 7:12 PM Segher Boessenkool > <segher@kernel.crashing.org> wrote: > > > > Currently the second lower-subreg pass is run right before RA. This > > is much too late to be very useful. At least for targets that do not > > have RTL patterns for operations on multi-register modes it is a lot > > better to split patterns earlier, before combine and all related > > passes. > > > > This adds an option -fsplit-wide-types-early that does that, and > > enables it by default for rs6000. > > Do you by chance have a (target specific) testcase you can add? I haven't yet figured out how to do a not terribly fragile test for this. Hrm, maybe the thing in the PR with -mcpu=power8 will do. I'll work on that. > The docs should mention that the new option doesn't have any effect > unless -fsplit-wide-types is enabled. Yeah I'll make that more explicit. Thanks, Segher > > 2019-07-07 Segher Boessenkool <segher@kernel.crashing.org> > > > > PR rtl-optimization/88233 > > * common.opt (fsplit-wide-types-early): New option. > > * common/config/rs6000/rs6000-common.c > > (rs6000_option_optimization_table): Add OPT_fsplit_wide_types_early for > > OPT_LEVELS_ALL. > > * doc/invoke.texi (Optimization Options): Add -fsplit-wide-types-early. > > * lower-subreg.c (pass_lower_subreg2::gate): Add test for > > flag_split_wide_types_early. > > (pass_data_lower_subreg3): New. > > (pass_lower_subreg3): New. > > (make_pass_lower_subreg3): New. > > * passes.def (pass_lower_subreg2): Move after the loop passes. > > (pass_lower_subreg3): New, inserted where pass_lower_subreg2 was. > > * tree-pass.h (make_pass_lower_subreg2): Move up, to its new place in > > the pass pipeline; its previous place is taken by ... > > (make_pass_lower_subreg3): ... this.
On Mon, Jul 08, 2019 at 12:20:55PM -0500, Segher Boessenkool wrote: > On Mon, Jul 08, 2019 at 01:27:25PM +0200, Richard Biener wrote: > > The docs should mention that the new option doesn't have any effect > > unless -fsplit-wide-types is enabled. > > Yeah I'll make that more explicit. I added This option has no effect unless @option{-fsplit-wide-types} is turned on. Segher
diff --git a/gcc/common.opt b/gcc/common.opt index 41514df..b998b25 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2430,6 +2430,10 @@ fsplit-wide-types Common Report Var(flag_split_wide_types) Optimization Split wide types into independent registers. +fsplit-wide-types-early +Common Report Var(flag_split_wide_types_early) Optimization +Split wide types into independent registers earlier. + fssa-backprop Common Report Var(flag_ssa_backprop) Init(1) Optimization Enable backward propagation of use properties at the SSA level. diff --git a/gcc/common/config/rs6000/rs6000-common.c b/gcc/common/config/rs6000/rs6000-common.c index 9857b54..4b0c205 100644 --- a/gcc/common/config/rs6000/rs6000-common.c +++ b/gcc/common/config/rs6000/rs6000-common.c @@ -31,6 +31,8 @@ /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ static const struct default_options rs6000_option_optimization_table[] = { + /* Split multi-word types early. */ + { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, /* Enable -fsched-pressure for first pass instruction scheduling. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, { OPT_LEVELS_NONE, 0, NULL, 0 } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 73d16b5..6349d4c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -467,7 +467,7 @@ Objective-C and Objective-C++ Dialects}. -fsignaling-nans @gol -fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol -fsplit-paths @gol --fsplit-wide-types -fssa-backprop -fssa-phiopt @gol +-fsplit-wide-types -fsplit-wide-types-early -fssa-backprop -fssa-phiopt @gol -fstdarg-opt -fstore-merging -fstrict-aliasing @gol -fthread-jumps -ftracer -ftree-bit-ccp @gol -ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol @@ -8731,6 +8731,12 @@ but may make debugging more difficult. Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}. +@item -fsplit-wide-types-early +@opindex fsplit-wide-types-early +Fully split wide types early, instead of very late. + +This is the default on some targets. + @item -fcse-follow-jumps @opindex fcse-follow-jumps In common subexpression elimination (CSE), scan through jump instructions diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c index 4f68a73..e1418e5 100644 --- a/gcc/lower-subreg.c +++ b/gcc/lower-subreg.c @@ -1801,7 +1801,8 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return flag_split_wide_types != 0; } + virtual bool gate (function *) { return flag_split_wide_types + && flag_split_wide_types_early; } virtual unsigned int execute (function *) { decompose_multiword_subregs (true); @@ -1817,3 +1818,46 @@ make_pass_lower_subreg2 (gcc::context *ctxt) { return new pass_lower_subreg2 (ctxt); } + +/* Implement third lower subreg pass. */ + +namespace { + +const pass_data pass_data_lower_subreg3 = +{ + RTL_PASS, /* type */ + "subreg3", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_LOWER_SUBREG, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_lower_subreg3 : public rtl_opt_pass +{ +public: + pass_lower_subreg3 (gcc::context *ctxt) + : rtl_opt_pass (pass_data_lower_subreg3, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return flag_split_wide_types + && !flag_split_wide_types_early; } + virtual unsigned int execute (function *) + { + decompose_multiword_subregs (true); + return 0; + } + +}; // class pass_lower_subreg3 + +} // anon namespace + +rtl_opt_pass * +make_pass_lower_subreg3 (gcc::context *ctxt) +{ + return new pass_lower_subreg3 (ctxt); +} diff --git a/gcc/passes.def b/gcc/passes.def index 9a5b0cd..1a7fd14 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -427,6 +427,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_rtl_doloop); NEXT_PASS (pass_rtl_loop_done); POP_INSERT_PASSES () + NEXT_PASS (pass_lower_subreg2); NEXT_PASS (pass_web); NEXT_PASS (pass_rtl_cprop); NEXT_PASS (pass_cse2); @@ -440,7 +441,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_partition_blocks); NEXT_PASS (pass_outof_cfg_layout_mode); NEXT_PASS (pass_split_all_insns); - NEXT_PASS (pass_lower_subreg2); + NEXT_PASS (pass_lower_subreg3); NEXT_PASS (pass_df_initialize_no_opt); NEXT_PASS (pass_stack_ptr_mod); NEXT_PASS (pass_mode_switching); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index b27dbdd..7b83293 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -552,6 +552,7 @@ extern rtl_opt_pass *make_pass_rtl_unroll_loops (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_lower_subreg2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_web (gcc::context *ctxt); extern rtl_opt_pass *make_pass_cse2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_df_initialize_opt (gcc::context *ctxt); @@ -567,7 +568,7 @@ extern rtl_opt_pass *make_pass_partition_blocks (gcc::context *ctxt); extern rtl_opt_pass *make_pass_match_asm_constraints (gcc::context *ctxt); extern rtl_opt_pass *make_pass_split_all_insns (gcc::context *ctxt); extern rtl_opt_pass *make_pass_fast_rtl_byte_dce (gcc::context *ctxt); -extern rtl_opt_pass *make_pass_lower_subreg2 (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_lower_subreg3 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_mode_switching (gcc::context *ctxt); extern rtl_opt_pass *make_pass_sms (gcc::context *ctxt); extern rtl_opt_pass *make_pass_sched (gcc::context *ctxt);