Message ID | 20220720022040.25852-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Move pass_cse_sincos after vectorizer. | expand |
On Wed, Jul 20, 2022 at 4:20 AM liuhongt <hongtao.liu@intel.com> wrote: > > __builtin_cexpi can't be vectorized since there's gap between it and > vectorized sincos version(In libmvec, it passes a double and two > double pointer and returns nothing.) And it will lose some > vectorization opportunity if sin & cos are optimized to cexpi before > vectorizer. > > I'm trying to add vect_recog_cexpi_pattern to split cexpi to sin and > cos, but it failed vectorizable_simd_clone_call since NULL is returned > by cgraph_node::get (fndecl). So alternatively, the patch try to move > pass_cse_sincos after vectorizer, just before pas_cse_reciprocals. > > Also original pass_cse_sincos additionaly expands pow&cabs, this patch > split that part into a separate pass named pass_expand_powcabs which > remains the old pass position. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Observe more libmvec sin/cos vectorization in specfp, but no big performance. > > Ok for trunk? OK. I wonder if we can merge the workers of the three passes we have into a single function, handing it an argument what to handle to be a bit more flexible in the future. That would also avoid doing > + NEXT_PASS (pass_cse_sincos); > NEXT_PASS (pass_cse_reciprocals); thus two function walks after each other. But I guess that can be done as followup (or not if we decide so). Thanks, Richard. > > gcc/ChangeLog: > > * passes.def: (Split pass_cse_sincos to pass_expand_powcabs > and pass_cse_sincos, and move pass_cse_sincos after vectorizer). > * timevar.def (TV_TREE_POWCABS): New timevar. > * tree-pass.h (make_pass_expand_powcabs): Split from pass_cse_sincos. > * tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Ditto. > (class pass_expand_powcabs): Ditto. > (pass_expand_powcabs::execute): Ditto. > (make_pass_expand_powcabs): Ditto. > (pass_cse_sincos::execute): Remove pow/cabs expand part. > (make_pass_cse_sincos): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pow-sqrt-synth-1.c: Adjust testcase. > --- > gcc/passes.def | 3 +- > gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c | 4 +- > gcc/timevar.def | 1 + > gcc/tree-pass.h | 1 + > gcc/tree-ssa-math-opts.cc | 112 +++++++++++++++++++----- > 5 files changed, 97 insertions(+), 24 deletions(-) > > diff --git a/gcc/passes.def b/gcc/passes.def > index 375d3d62d51..6bb92efacd4 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -253,7 +253,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_ccp, true /* nonzero_p */); > /* After CCP we rewrite no longer addressed locals into SSA > form if possible. */ > - NEXT_PASS (pass_cse_sincos); > + NEXT_PASS (pass_expand_powcabs); > NEXT_PASS (pass_optimize_bswap); > NEXT_PASS (pass_laddress); > NEXT_PASS (pass_lim); > @@ -328,6 +328,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_simduid_cleanup); > NEXT_PASS (pass_lower_vector_ssa); > NEXT_PASS (pass_lower_switch); > + NEXT_PASS (pass_cse_sincos); > NEXT_PASS (pass_cse_reciprocals); > NEXT_PASS (pass_reassoc, false /* early_p */); > NEXT_PASS (pass_strength_reduction); > diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > index 4a94325cdb3..484b29a8fc8 100644 > --- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > +++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target sqrt_insn } } */ > -/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ > +/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */ > /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */ > > double > @@ -34,4 +34,4 @@ vecfoo (double *a) > a[i] = __builtin_pow (a[i], 1.25); > } > > -/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ > +/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */ > diff --git a/gcc/timevar.def b/gcc/timevar.def > index 2dae5e1c760..651af19876f 100644 > --- a/gcc/timevar.def > +++ b/gcc/timevar.def > @@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") > DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") > DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") > DEFTIMEVAR (TV_TREE_SINCOS , "gimple CSE sin/cos") > +DEFTIMEVAR (TV_TREE_POWCABS , "gimple expand pow/cabs") > DEFTIMEVAR (TV_TREE_WIDEN_MUL , "gimple widening/fma detection") > DEFTIMEVAR (TV_TRANS_MEM , "transactional memory") > DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization") > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index 606d1d60b85..4dfe05ed8e0 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -444,6 +444,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt); > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index a4492c96419..58152b5a01c 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -2226,8 +2226,7 @@ gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) > } > > /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 > - on the SSA_NAME argument of each of them. Also expand powi(x,n) into > - an optimal number of multiplies, when n is a constant. */ > + on the SSA_NAME argument of each of them. */ > > namespace { > > @@ -2254,8 +2253,6 @@ public: > /* opt_pass methods: */ > bool gate (function *) final override > { > - /* We no longer require either sincos or cexp, since powi expansion > - piggybacks on this pass. */ > return optimize; > } > > @@ -2275,24 +2272,15 @@ pass_cse_sincos::execute (function *fun) > FOR_EACH_BB_FN (bb, fun) > { > gimple_stmt_iterator gsi; > - bool cleanup_eh = false; > > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > > - /* Only the last stmt in a bb could throw, no need to call > - gimple_purge_dead_eh_edges if we change something in the middle > - of a basic block. */ > - cleanup_eh = false; > - > if (is_gimple_call (stmt) > && gimple_call_lhs (stmt)) > { > - tree arg, arg0, arg1, result; > - HOST_WIDE_INT n; > - location_t loc; > - > + tree arg; > switch (gimple_call_combined_fn (stmt)) > { > CASE_CFN_COS: > @@ -2309,7 +2297,94 @@ pass_cse_sincos::execute (function *fun) > if (TREE_CODE (arg) == SSA_NAME) > cfg_changed |= execute_cse_sincos_1 (arg); > break; > + default: > + break; > + } > + } > + } > + } > + > + statistics_counter_event (fun, "sincos statements inserted", > + sincos_stats.inserted); > + statistics_counter_event (fun, "conv statements removed", > + sincos_stats.conv_removed); > + > + return cfg_changed ? TODO_cleanup_cfg : 0; > +} > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_cse_sincos (gcc::context *ctxt) > +{ > + return new pass_cse_sincos (ctxt); > +} > + > +/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant. > + Also expand CABS. */ > +namespace { > + > +const pass_data pass_data_expand_powcabs = > +{ > + GIMPLE_PASS, /* type */ > + "powcabs", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_TREE_POWCABS, /* tv_id */ > + PROP_ssa, /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + TODO_update_ssa, /* todo_flags_finish */ > +}; > + > +class pass_expand_powcabs : public gimple_opt_pass > +{ > +public: > + pass_expand_powcabs (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_expand_powcabs, ctxt) > + {} > > + /* opt_pass methods: */ > + bool gate (function *) final override > + { > + return optimize; > + } > + > + unsigned int execute (function *) final override; > + > +}; // class pass_expand_powcabs > + > +unsigned int > +pass_expand_powcabs::execute (function *fun) > +{ > + basic_block bb; > + bool cfg_changed = false; > + > + calculate_dominance_info (CDI_DOMINATORS); > + > + FOR_EACH_BB_FN (bb, fun) > + { > + gimple_stmt_iterator gsi; > + bool cleanup_eh = false; > + > + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + > + /* Only the last stmt in a bb could throw, no need to call > + gimple_purge_dead_eh_edges if we change something in the middle > + of a basic block. */ > + cleanup_eh = false; > + > + if (is_gimple_call (stmt) > + && gimple_call_lhs (stmt)) > + { > + tree arg0, arg1, result; > + HOST_WIDE_INT n; > + location_t loc; > + > + switch (gimple_call_combined_fn (stmt)) > + { > CASE_CFN_POW: > arg0 = gimple_call_arg (stmt, 0); > arg1 = gimple_call_arg (stmt, 1); > @@ -2405,20 +2480,15 @@ pass_cse_sincos::execute (function *fun) > cfg_changed |= gimple_purge_dead_eh_edges (bb); > } > > - statistics_counter_event (fun, "sincos statements inserted", > - sincos_stats.inserted); > - statistics_counter_event (fun, "conv statements removed", > - sincos_stats.conv_removed); > - > return cfg_changed ? TODO_cleanup_cfg : 0; > } > > } // anon namespace > > gimple_opt_pass * > -make_pass_cse_sincos (gcc::context *ctxt) > +make_pass_expand_powcabs (gcc::context *ctxt) > { > - return new pass_cse_sincos (ctxt); > + return new pass_expand_powcabs (ctxt); > } > > /* Return true if stmt is a type conversion operation that can be stripped > -- > 2.18.1 >
On Wed, Jul 20, 2022 at 3:59 PM Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > On Wed, Jul 20, 2022 at 4:20 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > __builtin_cexpi can't be vectorized since there's gap between it and > > vectorized sincos version(In libmvec, it passes a double and two > > double pointer and returns nothing.) And it will lose some > > vectorization opportunity if sin & cos are optimized to cexpi before > > vectorizer. > > > > I'm trying to add vect_recog_cexpi_pattern to split cexpi to sin and > > cos, but it failed vectorizable_simd_clone_call since NULL is returned > > by cgraph_node::get (fndecl). So alternatively, the patch try to move > > pass_cse_sincos after vectorizer, just before pas_cse_reciprocals. > > > > Also original pass_cse_sincos additionaly expands pow&cabs, this patch > > split that part into a separate pass named pass_expand_powcabs which > > remains the old pass position. > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > > Observe more libmvec sin/cos vectorization in specfp, but no big performance. > > > > Ok for trunk? > > OK. > > I wonder if we can merge the workers of the three passes we have into > a single function, handing it an argument what to handle to be a bit more > flexible in the future. That would also avoid doing > > > + NEXT_PASS (pass_cse_sincos); > > NEXT_PASS (pass_cse_reciprocals); > > thus two function walks after each other. But I guess that can be done > as followup (or not if we decide so). Let me try this as followup. > > Thanks, > Richard. > > > > > gcc/ChangeLog: > > > > * passes.def: (Split pass_cse_sincos to pass_expand_powcabs > > and pass_cse_sincos, and move pass_cse_sincos after vectorizer). > > * timevar.def (TV_TREE_POWCABS): New timevar. > > * tree-pass.h (make_pass_expand_powcabs): Split from pass_cse_sincos. > > * tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Ditto. > > (class pass_expand_powcabs): Ditto. > > (pass_expand_powcabs::execute): Ditto. > > (make_pass_expand_powcabs): Ditto. > > (pass_cse_sincos::execute): Remove pow/cabs expand part. > > (make_pass_cse_sincos): Ditto. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/pow-sqrt-synth-1.c: Adjust testcase. > > --- > > gcc/passes.def | 3 +- > > gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c | 4 +- > > gcc/timevar.def | 1 + > > gcc/tree-pass.h | 1 + > > gcc/tree-ssa-math-opts.cc | 112 +++++++++++++++++++----- > > 5 files changed, 97 insertions(+), 24 deletions(-) > > > > diff --git a/gcc/passes.def b/gcc/passes.def > > index 375d3d62d51..6bb92efacd4 100644 > > --- a/gcc/passes.def > > +++ b/gcc/passes.def > > @@ -253,7 +253,7 @@ along with GCC; see the file COPYING3. If not see > > NEXT_PASS (pass_ccp, true /* nonzero_p */); > > /* After CCP we rewrite no longer addressed locals into SSA > > form if possible. */ > > - NEXT_PASS (pass_cse_sincos); > > + NEXT_PASS (pass_expand_powcabs); > > NEXT_PASS (pass_optimize_bswap); > > NEXT_PASS (pass_laddress); > > NEXT_PASS (pass_lim); > > @@ -328,6 +328,7 @@ along with GCC; see the file COPYING3. If not see > > NEXT_PASS (pass_simduid_cleanup); > > NEXT_PASS (pass_lower_vector_ssa); > > NEXT_PASS (pass_lower_switch); > > + NEXT_PASS (pass_cse_sincos); > > NEXT_PASS (pass_cse_reciprocals); > > NEXT_PASS (pass_reassoc, false /* early_p */); > > NEXT_PASS (pass_strength_reduction); > > diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > > index 4a94325cdb3..484b29a8fc8 100644 > > --- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > > +++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target sqrt_insn } } */ > > -/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ > > +/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */ > > /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */ > > > > double > > @@ -34,4 +34,4 @@ vecfoo (double *a) > > a[i] = __builtin_pow (a[i], 1.25); > > } > > > > -/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ > > +/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */ > > diff --git a/gcc/timevar.def b/gcc/timevar.def > > index 2dae5e1c760..651af19876f 100644 > > --- a/gcc/timevar.def > > +++ b/gcc/timevar.def > > @@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") > > DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") > > DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") > > DEFTIMEVAR (TV_TREE_SINCOS , "gimple CSE sin/cos") > > +DEFTIMEVAR (TV_TREE_POWCABS , "gimple expand pow/cabs") > > DEFTIMEVAR (TV_TREE_WIDEN_MUL , "gimple widening/fma detection") > > DEFTIMEVAR (TV_TRANS_MEM , "transactional memory") > > DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization") > > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > > index 606d1d60b85..4dfe05ed8e0 100644 > > --- a/gcc/tree-pass.h > > +++ b/gcc/tree-pass.h > > @@ -444,6 +444,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt); > > +extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt); > > extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt); > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index a4492c96419..58152b5a01c 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -2226,8 +2226,7 @@ gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) > > } > > > > /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 > > - on the SSA_NAME argument of each of them. Also expand powi(x,n) into > > - an optimal number of multiplies, when n is a constant. */ > > + on the SSA_NAME argument of each of them. */ > > > > namespace { > > > > @@ -2254,8 +2253,6 @@ public: > > /* opt_pass methods: */ > > bool gate (function *) final override > > { > > - /* We no longer require either sincos or cexp, since powi expansion > > - piggybacks on this pass. */ > > return optimize; > > } > > > > @@ -2275,24 +2272,15 @@ pass_cse_sincos::execute (function *fun) > > FOR_EACH_BB_FN (bb, fun) > > { > > gimple_stmt_iterator gsi; > > - bool cleanup_eh = false; > > > > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > { > > gimple *stmt = gsi_stmt (gsi); > > > > - /* Only the last stmt in a bb could throw, no need to call > > - gimple_purge_dead_eh_edges if we change something in the middle > > - of a basic block. */ > > - cleanup_eh = false; > > - > > if (is_gimple_call (stmt) > > && gimple_call_lhs (stmt)) > > { > > - tree arg, arg0, arg1, result; > > - HOST_WIDE_INT n; > > - location_t loc; > > - > > + tree arg; > > switch (gimple_call_combined_fn (stmt)) > > { > > CASE_CFN_COS: > > @@ -2309,7 +2297,94 @@ pass_cse_sincos::execute (function *fun) > > if (TREE_CODE (arg) == SSA_NAME) > > cfg_changed |= execute_cse_sincos_1 (arg); > > break; > > + default: > > + break; > > + } > > + } > > + } > > + } > > + > > + statistics_counter_event (fun, "sincos statements inserted", > > + sincos_stats.inserted); > > + statistics_counter_event (fun, "conv statements removed", > > + sincos_stats.conv_removed); > > + > > + return cfg_changed ? TODO_cleanup_cfg : 0; > > +} > > + > > +} // anon namespace > > + > > +gimple_opt_pass * > > +make_pass_cse_sincos (gcc::context *ctxt) > > +{ > > + return new pass_cse_sincos (ctxt); > > +} > > + > > +/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant. > > + Also expand CABS. */ > > +namespace { > > + > > +const pass_data pass_data_expand_powcabs = > > +{ > > + GIMPLE_PASS, /* type */ > > + "powcabs", /* name */ > > + OPTGROUP_NONE, /* optinfo_flags */ > > + TV_TREE_POWCABS, /* tv_id */ > > + PROP_ssa, /* properties_required */ > > + 0, /* properties_provided */ > > + 0, /* properties_destroyed */ > > + 0, /* todo_flags_start */ > > + TODO_update_ssa, /* todo_flags_finish */ > > +}; > > + > > +class pass_expand_powcabs : public gimple_opt_pass > > +{ > > +public: > > + pass_expand_powcabs (gcc::context *ctxt) > > + : gimple_opt_pass (pass_data_expand_powcabs, ctxt) > > + {} > > > > + /* opt_pass methods: */ > > + bool gate (function *) final override > > + { > > + return optimize; > > + } > > + > > + unsigned int execute (function *) final override; > > + > > +}; // class pass_expand_powcabs > > + > > +unsigned int > > +pass_expand_powcabs::execute (function *fun) > > +{ > > + basic_block bb; > > + bool cfg_changed = false; > > + > > + calculate_dominance_info (CDI_DOMINATORS); > > + > > + FOR_EACH_BB_FN (bb, fun) > > + { > > + gimple_stmt_iterator gsi; > > + bool cleanup_eh = false; > > + > > + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > + { > > + gimple *stmt = gsi_stmt (gsi); > > + > > + /* Only the last stmt in a bb could throw, no need to call > > + gimple_purge_dead_eh_edges if we change something in the middle > > + of a basic block. */ > > + cleanup_eh = false; > > + > > + if (is_gimple_call (stmt) > > + && gimple_call_lhs (stmt)) > > + { > > + tree arg0, arg1, result; > > + HOST_WIDE_INT n; > > + location_t loc; > > + > > + switch (gimple_call_combined_fn (stmt)) > > + { > > CASE_CFN_POW: > > arg0 = gimple_call_arg (stmt, 0); > > arg1 = gimple_call_arg (stmt, 1); > > @@ -2405,20 +2480,15 @@ pass_cse_sincos::execute (function *fun) > > cfg_changed |= gimple_purge_dead_eh_edges (bb); > > } > > > > - statistics_counter_event (fun, "sincos statements inserted", > > - sincos_stats.inserted); > > - statistics_counter_event (fun, "conv statements removed", > > - sincos_stats.conv_removed); > > - > > return cfg_changed ? TODO_cleanup_cfg : 0; > > } > > > > } // anon namespace > > > > gimple_opt_pass * > > -make_pass_cse_sincos (gcc::context *ctxt) > > +make_pass_expand_powcabs (gcc::context *ctxt) > > { > > - return new pass_cse_sincos (ctxt); > > + return new pass_expand_powcabs (ctxt); > > } > > > > /* Return true if stmt is a type conversion operation that can be stripped > > -- > > 2.18.1 > >
diff --git a/gcc/passes.def b/gcc/passes.def index 375d3d62d51..6bb92efacd4 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -253,7 +253,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ccp, true /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_cse_sincos); + NEXT_PASS (pass_expand_powcabs); NEXT_PASS (pass_optimize_bswap); NEXT_PASS (pass_laddress); NEXT_PASS (pass_lim); @@ -328,6 +328,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_simduid_cleanup); NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_lower_switch); + NEXT_PASS (pass_cse_sincos); NEXT_PASS (pass_cse_reciprocals); NEXT_PASS (pass_reassoc, false /* early_p */); NEXT_PASS (pass_strength_reduction); diff --git a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c index 4a94325cdb3..484b29a8fc8 100644 --- a/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c +++ b/gcc/testsuite/gcc.dg/pow-sqrt-synth-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target sqrt_insn } } */ -/* { dg-options "-fdump-tree-sincos -Ofast --param max-pow-sqrt-depth=8" } */ +/* { dg-options "-fdump-tree-powcabs -Ofast --param max-pow-sqrt-depth=8" } */ /* { dg-additional-options "-mfloat-abi=softfp -mfpu=neon-vfpv4" { target arm*-*-* } } */ double @@ -34,4 +34,4 @@ vecfoo (double *a) a[i] = __builtin_pow (a[i], 1.25); } -/* { dg-final { scan-tree-dump-times "synthesizing" 7 "sincos" } } */ +/* { dg-final { scan-tree-dump-times "synthesizing" 7 "powcabs" } } */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 2dae5e1c760..651af19876f 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") DEFTIMEVAR (TV_TREE_SINCOS , "gimple CSE sin/cos") +DEFTIMEVAR (TV_TREE_POWCABS , "gimple expand pow/cabs") DEFTIMEVAR (TV_TREE_WIDEN_MUL , "gimple widening/fma detection") DEFTIMEVAR (TV_TRANS_MEM , "transactional memory") DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 606d1d60b85..4dfe05ed8e0 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -444,6 +444,7 @@ extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt); extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_expand_powcabs (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt); extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt); extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt); diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index a4492c96419..58152b5a01c 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -2226,8 +2226,7 @@ gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) } /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 - on the SSA_NAME argument of each of them. Also expand powi(x,n) into - an optimal number of multiplies, when n is a constant. */ + on the SSA_NAME argument of each of them. */ namespace { @@ -2254,8 +2253,6 @@ public: /* opt_pass methods: */ bool gate (function *) final override { - /* We no longer require either sincos or cexp, since powi expansion - piggybacks on this pass. */ return optimize; } @@ -2275,24 +2272,15 @@ pass_cse_sincos::execute (function *fun) FOR_EACH_BB_FN (bb, fun) { gimple_stmt_iterator gsi; - bool cleanup_eh = false; for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - /* Only the last stmt in a bb could throw, no need to call - gimple_purge_dead_eh_edges if we change something in the middle - of a basic block. */ - cleanup_eh = false; - if (is_gimple_call (stmt) && gimple_call_lhs (stmt)) { - tree arg, arg0, arg1, result; - HOST_WIDE_INT n; - location_t loc; - + tree arg; switch (gimple_call_combined_fn (stmt)) { CASE_CFN_COS: @@ -2309,7 +2297,94 @@ pass_cse_sincos::execute (function *fun) if (TREE_CODE (arg) == SSA_NAME) cfg_changed |= execute_cse_sincos_1 (arg); break; + default: + break; + } + } + } + } + + statistics_counter_event (fun, "sincos statements inserted", + sincos_stats.inserted); + statistics_counter_event (fun, "conv statements removed", + sincos_stats.conv_removed); + + return cfg_changed ? TODO_cleanup_cfg : 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_cse_sincos (gcc::context *ctxt) +{ + return new pass_cse_sincos (ctxt); +} + +/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant. + Also expand CABS. */ +namespace { + +const pass_data pass_data_expand_powcabs = +{ + GIMPLE_PASS, /* type */ + "powcabs", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_POWCABS, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_expand_powcabs : public gimple_opt_pass +{ +public: + pass_expand_powcabs (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_powcabs, ctxt) + {} + /* opt_pass methods: */ + bool gate (function *) final override + { + return optimize; + } + + unsigned int execute (function *) final override; + +}; // class pass_expand_powcabs + +unsigned int +pass_expand_powcabs::execute (function *fun) +{ + basic_block bb; + bool cfg_changed = false; + + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + bool cleanup_eh = false; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + /* Only the last stmt in a bb could throw, no need to call + gimple_purge_dead_eh_edges if we change something in the middle + of a basic block. */ + cleanup_eh = false; + + if (is_gimple_call (stmt) + && gimple_call_lhs (stmt)) + { + tree arg0, arg1, result; + HOST_WIDE_INT n; + location_t loc; + + switch (gimple_call_combined_fn (stmt)) + { CASE_CFN_POW: arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); @@ -2405,20 +2480,15 @@ pass_cse_sincos::execute (function *fun) cfg_changed |= gimple_purge_dead_eh_edges (bb); } - statistics_counter_event (fun, "sincos statements inserted", - sincos_stats.inserted); - statistics_counter_event (fun, "conv statements removed", - sincos_stats.conv_removed); - return cfg_changed ? TODO_cleanup_cfg : 0; } } // anon namespace gimple_opt_pass * -make_pass_cse_sincos (gcc::context *ctxt) +make_pass_expand_powcabs (gcc::context *ctxt) { - return new pass_cse_sincos (ctxt); + return new pass_expand_powcabs (ctxt); } /* Return true if stmt is a type conversion operation that can be stripped