Message ID | alpine.LSU.2.11.1404151123370.31108@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
On Tue, 15 Apr 2014, Richard Biener wrote: > > This removes RTL loop unswitching (see last years discussion about > compile-time issues of that pass). RTL loop unswitching is > enabled together with GIMPLE loop unswitching at -O3 and by > -floop-unswitch. It's clearly the wrong place to do high-level > loop transforms these days, and the cost of maintainance doesn't > outweight the questionable benefit. > > Thus the following patch removes it. > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu (I hope > for testsuite fallout). No testsuite fallout, thus no testcases that test for a working RTL unswitching (on x86_64/i586 at least). Richard. > Any objections? > > Thanks, > Richard. > > 2014-04-15 Richard Biener <rguenther@suse.de> > > * Makefile.in (OBJS): Remove loop-unswitch.o. > * loop-unswitch.c: Delete. > * tree-pass.h (make_pass_rtl_unswitch): Remove. > * passes.def (pass_rtl_unswitch): Likewise. > * loop-init.c (gate_rtl_unswitch): Likewise. > (rtl_unswitch): Likewise. > (pass_data_rtl_unswitch): Likewise. > (pass_rtl_unswitch): Likewise. > (make_pass_rtl_unswitch): Likewise. > * rtl.h (reversed_condition): Likewise. > (compare_and_jump_seq): Likewise. > * loop-iv.c (reversed_condition): Move here from loop-unswitch.c > and make static. > * loop-unroll.c (compare_and_jump_seq): Likewise. > > Index: gcc/Makefile.in > =================================================================== > --- gcc/Makefile.in (revision 209410) > +++ gcc/Makefile.in (working copy) > @@ -1294,7 +1294,6 @@ OBJS = \ > loop-invariant.o \ > loop-iv.o \ > loop-unroll.o \ > - loop-unswitch.o \ > lower-subreg.o \ > lra.o \ > lra-assigns.o \ > Index: gcc/tree-pass.h > =================================================================== > --- gcc/tree-pass.h (revision 209410) > +++ gcc/tree-pass.h (working copy) > @@ -512,7 +512,6 @@ extern rtl_opt_pass *make_pass_outof_cfg > extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt); > -extern rtl_opt_pass *make_pass_rtl_unswitch (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); > Index: gcc/passes.def > =================================================================== > --- gcc/passes.def (revision 209410) > +++ gcc/passes.def (working copy) > @@ -341,7 +341,6 @@ along with GCC; see the file COPYING3. > PUSH_INSERT_PASSES_WITHIN (pass_loop2) > NEXT_PASS (pass_rtl_loop_init); > NEXT_PASS (pass_rtl_move_loop_invariants); > - NEXT_PASS (pass_rtl_unswitch); > NEXT_PASS (pass_rtl_unroll_and_peel_loops); > NEXT_PASS (pass_rtl_doloop); > NEXT_PASS (pass_rtl_loop_done); > Index: gcc/loop-init.c > =================================================================== > --- gcc/loop-init.c (revision 209410) > +++ gcc/loop-init.c (working copy) > @@ -518,61 +518,7 @@ make_pass_rtl_move_loop_invariants (gcc: > } > > > -/* Loop unswitching for RTL. */ > -static bool > -gate_rtl_unswitch (void) > -{ > - return flag_unswitch_loops; > -} > - > -static unsigned int > -rtl_unswitch (void) > -{ > - if (number_of_loops (cfun) > 1) > - unswitch_loops (); > - return 0; > -} > - > -namespace { > - > -const pass_data pass_data_rtl_unswitch = > -{ > - RTL_PASS, /* type */ > - "loop2_unswitch", /* name */ > - OPTGROUP_LOOP, /* optinfo_flags */ > - true, /* has_gate */ > - true, /* has_execute */ > - TV_LOOP_UNSWITCH, /* tv_id */ > - 0, /* properties_required */ > - 0, /* properties_provided */ > - 0, /* properties_destroyed */ > - 0, /* todo_flags_start */ > - TODO_verify_rtl_sharing, /* todo_flags_finish */ > -}; > - > -class pass_rtl_unswitch : public rtl_opt_pass > -{ > -public: > - pass_rtl_unswitch (gcc::context *ctxt) > - : rtl_opt_pass (pass_data_rtl_unswitch, ctxt) > - {} > - > - /* opt_pass methods: */ > - bool gate () { return gate_rtl_unswitch (); } > - unsigned int execute () { return rtl_unswitch (); } > - > -}; // class pass_rtl_unswitch > - > -} // anon namespace > - > -rtl_opt_pass * > -make_pass_rtl_unswitch (gcc::context *ctxt) > -{ > - return new pass_rtl_unswitch (ctxt); > -} > - > - > -/* Loop unswitching for RTL. */ > +/* Loop unrolling and peeling for RTL. */ > static bool > gate_rtl_unroll_and_peel_loops (void) > { > Index: gcc/loop-iv.c > =================================================================== > --- gcc/loop-iv.c (revision 209410) > +++ gcc/loop-iv.c (working copy) > @@ -1732,6 +1732,21 @@ canon_condition (rtx cond) > return cond; > } > > +/* Reverses CONDition; returns NULL if we cannot. */ > + > +static rtx > +reversed_condition (rtx cond) > +{ > + enum rtx_code reversed; > + reversed = reversed_comparison_code (cond, NULL); > + if (reversed == UNKNOWN) > + return NULL_RTX; > + else > + return gen_rtx_fmt_ee (reversed, > + GET_MODE (cond), XEXP (cond, 0), > + XEXP (cond, 1)); > +} > + > /* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the > set of altered regs. */ > > Index: gcc/loop-unroll.c > =================================================================== > --- gcc/loop-unroll.c (revision 209410) > +++ gcc/loop-unroll.c (working copy) > @@ -1060,6 +1060,59 @@ split_edge_and_insert (edge e, rtx insns > return bb; > } > > +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if > + true, with probability PROB. If CINSN is not NULL, it is the insn to copy > + in order to create a jump. */ > + > +static rtx > +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, > + rtx cinsn) > +{ > + rtx seq, jump, cond; > + enum machine_mode mode; > + > + mode = GET_MODE (op0); > + if (mode == VOIDmode) > + mode = GET_MODE (op1); > + > + start_sequence (); > + if (GET_MODE_CLASS (mode) == MODE_CC) > + { > + /* A hack -- there seems to be no easy generic way how to make a > + conditional jump from a ccmode comparison. */ > + gcc_assert (cinsn); > + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); > + gcc_assert (GET_CODE (cond) == comp); > + gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); > + gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); > + emit_jump_insn (copy_insn (PATTERN (cinsn))); > + jump = get_last_insn (); > + gcc_assert (JUMP_P (jump)); > + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); > + LABEL_NUSES (JUMP_LABEL (jump))++; > + redirect_jump (jump, label, 0); > + } > + else > + { > + gcc_assert (!cinsn); > + > + op0 = force_operand (op0, NULL_RTX); > + op1 = force_operand (op1, NULL_RTX); > + do_compare_rtx_and_jump (op0, op1, comp, 0, > + mode, NULL_RTX, NULL_RTX, label, -1); > + jump = get_last_insn (); > + gcc_assert (JUMP_P (jump)); > + JUMP_LABEL (jump) = label; > + LABEL_NUSES (label)++; > + } > + add_int_reg_note (jump, REG_BR_PROB, prob); > + > + seq = get_insns (); > + end_sequence (); > + > + return seq; > +} > + > /* Unroll LOOP for which we are able to count number of iterations in runtime > LOOP->LPT_DECISION.TIMES times. The transformation does this (with some > extra care for case n < 0): > Index: gcc/rtl.h > =================================================================== > --- gcc/rtl.h (revision 209410) > +++ gcc/rtl.h (working copy) > @@ -2743,10 +2743,6 @@ extern unsigned int variable_tracking_ma > extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, > rtx *, rtx *); > > -/* In loop-unswitch.c */ > -extern rtx reversed_condition (rtx); > -extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); > - > /* In loop-iv.c */ > extern rtx canon_condition (rtx); > extern void simplify_using_condition (rtx, rtx *, bitmap); >
> > This removes RTL loop unswitching (see last years discussion about > compile-time issues of that pass). RTL loop unswitching is > enabled together with GIMPLE loop unswitching at -O3 and by > -floop-unswitch. It's clearly the wrong place to do high-level > loop transforms these days, and the cost of maintainance doesn't > outweight the questionable benefit. > > Thus the following patch removes it. > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu (I hope > for testsuite fallout). > > Any objections? Not really, I am all for moving more of loop stuff to trees. Did you performed some benchmarks? (I remember I did in 2012 but completely forgot the outcome). On related note, shall I try to update the following? http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html Honza > > Thanks, > Richard. > > 2014-04-15 Richard Biener <rguenther@suse.de> > > * Makefile.in (OBJS): Remove loop-unswitch.o. > * loop-unswitch.c: Delete. > * tree-pass.h (make_pass_rtl_unswitch): Remove. > * passes.def (pass_rtl_unswitch): Likewise. > * loop-init.c (gate_rtl_unswitch): Likewise. > (rtl_unswitch): Likewise. > (pass_data_rtl_unswitch): Likewise. > (pass_rtl_unswitch): Likewise. > (make_pass_rtl_unswitch): Likewise. > * rtl.h (reversed_condition): Likewise. > (compare_and_jump_seq): Likewise. > * loop-iv.c (reversed_condition): Move here from loop-unswitch.c > and make static. > * loop-unroll.c (compare_and_jump_seq): Likewise. > > Index: gcc/Makefile.in > =================================================================== > --- gcc/Makefile.in (revision 209410) > +++ gcc/Makefile.in (working copy) > @@ -1294,7 +1294,6 @@ OBJS = \ > loop-invariant.o \ > loop-iv.o \ > loop-unroll.o \ > - loop-unswitch.o \ > lower-subreg.o \ > lra.o \ > lra-assigns.o \ > Index: gcc/tree-pass.h > =================================================================== > --- gcc/tree-pass.h (revision 209410) > +++ gcc/tree-pass.h (working copy) > @@ -512,7 +512,6 @@ extern rtl_opt_pass *make_pass_outof_cfg > extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt); > -extern rtl_opt_pass *make_pass_rtl_unswitch (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); > extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); > Index: gcc/passes.def > =================================================================== > --- gcc/passes.def (revision 209410) > +++ gcc/passes.def (working copy) > @@ -341,7 +341,6 @@ along with GCC; see the file COPYING3. > PUSH_INSERT_PASSES_WITHIN (pass_loop2) > NEXT_PASS (pass_rtl_loop_init); > NEXT_PASS (pass_rtl_move_loop_invariants); > - NEXT_PASS (pass_rtl_unswitch); > NEXT_PASS (pass_rtl_unroll_and_peel_loops); > NEXT_PASS (pass_rtl_doloop); > NEXT_PASS (pass_rtl_loop_done); > Index: gcc/loop-init.c > =================================================================== > --- gcc/loop-init.c (revision 209410) > +++ gcc/loop-init.c (working copy) > @@ -518,61 +518,7 @@ make_pass_rtl_move_loop_invariants (gcc: > } > > > -/* Loop unswitching for RTL. */ > -static bool > -gate_rtl_unswitch (void) > -{ > - return flag_unswitch_loops; > -} > - > -static unsigned int > -rtl_unswitch (void) > -{ > - if (number_of_loops (cfun) > 1) > - unswitch_loops (); > - return 0; > -} > - > -namespace { > - > -const pass_data pass_data_rtl_unswitch = > -{ > - RTL_PASS, /* type */ > - "loop2_unswitch", /* name */ > - OPTGROUP_LOOP, /* optinfo_flags */ > - true, /* has_gate */ > - true, /* has_execute */ > - TV_LOOP_UNSWITCH, /* tv_id */ > - 0, /* properties_required */ > - 0, /* properties_provided */ > - 0, /* properties_destroyed */ > - 0, /* todo_flags_start */ > - TODO_verify_rtl_sharing, /* todo_flags_finish */ > -}; > - > -class pass_rtl_unswitch : public rtl_opt_pass > -{ > -public: > - pass_rtl_unswitch (gcc::context *ctxt) > - : rtl_opt_pass (pass_data_rtl_unswitch, ctxt) > - {} > - > - /* opt_pass methods: */ > - bool gate () { return gate_rtl_unswitch (); } > - unsigned int execute () { return rtl_unswitch (); } > - > -}; // class pass_rtl_unswitch > - > -} // anon namespace > - > -rtl_opt_pass * > -make_pass_rtl_unswitch (gcc::context *ctxt) > -{ > - return new pass_rtl_unswitch (ctxt); > -} > - > - > -/* Loop unswitching for RTL. */ > +/* Loop unrolling and peeling for RTL. */ > static bool > gate_rtl_unroll_and_peel_loops (void) > { > Index: gcc/loop-iv.c > =================================================================== > --- gcc/loop-iv.c (revision 209410) > +++ gcc/loop-iv.c (working copy) > @@ -1732,6 +1732,21 @@ canon_condition (rtx cond) > return cond; > } > > +/* Reverses CONDition; returns NULL if we cannot. */ > + > +static rtx > +reversed_condition (rtx cond) > +{ > + enum rtx_code reversed; > + reversed = reversed_comparison_code (cond, NULL); > + if (reversed == UNKNOWN) > + return NULL_RTX; > + else > + return gen_rtx_fmt_ee (reversed, > + GET_MODE (cond), XEXP (cond, 0), > + XEXP (cond, 1)); > +} > + > /* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the > set of altered regs. */ > > Index: gcc/loop-unroll.c > =================================================================== > --- gcc/loop-unroll.c (revision 209410) > +++ gcc/loop-unroll.c (working copy) > @@ -1060,6 +1060,59 @@ split_edge_and_insert (edge e, rtx insns > return bb; > } > > +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if > + true, with probability PROB. If CINSN is not NULL, it is the insn to copy > + in order to create a jump. */ > + > +static rtx > +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, > + rtx cinsn) > +{ > + rtx seq, jump, cond; > + enum machine_mode mode; > + > + mode = GET_MODE (op0); > + if (mode == VOIDmode) > + mode = GET_MODE (op1); > + > + start_sequence (); > + if (GET_MODE_CLASS (mode) == MODE_CC) > + { > + /* A hack -- there seems to be no easy generic way how to make a > + conditional jump from a ccmode comparison. */ > + gcc_assert (cinsn); > + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); > + gcc_assert (GET_CODE (cond) == comp); > + gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); > + gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); > + emit_jump_insn (copy_insn (PATTERN (cinsn))); > + jump = get_last_insn (); > + gcc_assert (JUMP_P (jump)); > + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); > + LABEL_NUSES (JUMP_LABEL (jump))++; > + redirect_jump (jump, label, 0); > + } > + else > + { > + gcc_assert (!cinsn); > + > + op0 = force_operand (op0, NULL_RTX); > + op1 = force_operand (op1, NULL_RTX); > + do_compare_rtx_and_jump (op0, op1, comp, 0, > + mode, NULL_RTX, NULL_RTX, label, -1); > + jump = get_last_insn (); > + gcc_assert (JUMP_P (jump)); > + JUMP_LABEL (jump) = label; > + LABEL_NUSES (label)++; > + } > + add_int_reg_note (jump, REG_BR_PROB, prob); > + > + seq = get_insns (); > + end_sequence (); > + > + return seq; > +} > + > /* Unroll LOOP for which we are able to count number of iterations in runtime > LOOP->LPT_DECISION.TIMES times. The transformation does this (with some > extra care for case n < 0): > Index: gcc/rtl.h > =================================================================== > --- gcc/rtl.h (revision 209410) > +++ gcc/rtl.h (working copy) > @@ -2743,10 +2743,6 @@ extern unsigned int variable_tracking_ma > extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, > rtx *, rtx *); > > -/* In loop-unswitch.c */ > -extern rtx reversed_condition (rtx); > -extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); > - > /* In loop-iv.c */ > extern rtx canon_condition (rtx); > extern void simplify_using_condition (rtx, rtx *, bitmap);
On Sun, 20 Apr 2014, Jan Hubicka wrote: > > > > This removes RTL loop unswitching (see last years discussion about > > compile-time issues of that pass). RTL loop unswitching is > > enabled together with GIMPLE loop unswitching at -O3 and by > > -floop-unswitch. It's clearly the wrong place to do high-level > > loop transforms these days, and the cost of maintainance doesn't > > outweight the questionable benefit. > > > > Thus the following patch removes it. > > > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu (I hope > > for testsuite fallout). > > > > Any objections? > > Not really, I am all for moving more of loop stuff to trees. > Did you performed some benchmarks? (I remember I did in 2012 > but completely forgot the outcome). I did that last year and it showed no difference in SPEC 2k6. When bootstrapping with -O3 and a gcc_unreachable () in the RTL unswitching path you get some ICEs there but they are due to different "effective" --param max-unswitch-insns that is on GIMPLE applied to tree_num_loop_insns () and on RTL to num_loop_insns (). I'll go forward with the patch today. > On related note, shall I try to update the following? > http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html Yeah. Thanks, Richard. > Honza > > > > Thanks, > > Richard. > > > > 2014-04-15 Richard Biener <rguenther@suse.de> > > > > * Makefile.in (OBJS): Remove loop-unswitch.o. > > * loop-unswitch.c: Delete. > > * tree-pass.h (make_pass_rtl_unswitch): Remove. > > * passes.def (pass_rtl_unswitch): Likewise. > > * loop-init.c (gate_rtl_unswitch): Likewise. > > (rtl_unswitch): Likewise. > > (pass_data_rtl_unswitch): Likewise. > > (pass_rtl_unswitch): Likewise. > > (make_pass_rtl_unswitch): Likewise. > > * rtl.h (reversed_condition): Likewise. > > (compare_and_jump_seq): Likewise. > > * loop-iv.c (reversed_condition): Move here from loop-unswitch.c > > and make static. > > * loop-unroll.c (compare_and_jump_seq): Likewise. > > > > Index: gcc/Makefile.in > > =================================================================== > > --- gcc/Makefile.in (revision 209410) > > +++ gcc/Makefile.in (working copy) > > @@ -1294,7 +1294,6 @@ OBJS = \ > > loop-invariant.o \ > > loop-iv.o \ > > loop-unroll.o \ > > - loop-unswitch.o \ > > lower-subreg.o \ > > lra.o \ > > lra-assigns.o \ > > Index: gcc/tree-pass.h > > =================================================================== > > --- gcc/tree-pass.h (revision 209410) > > +++ gcc/tree-pass.h (working copy) > > @@ -512,7 +512,6 @@ extern rtl_opt_pass *make_pass_outof_cfg > > extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt); > > -extern rtl_opt_pass *make_pass_rtl_unswitch (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); > > extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); > > Index: gcc/passes.def > > =================================================================== > > --- gcc/passes.def (revision 209410) > > +++ gcc/passes.def (working copy) > > @@ -341,7 +341,6 @@ along with GCC; see the file COPYING3. > > PUSH_INSERT_PASSES_WITHIN (pass_loop2) > > NEXT_PASS (pass_rtl_loop_init); > > NEXT_PASS (pass_rtl_move_loop_invariants); > > - NEXT_PASS (pass_rtl_unswitch); > > NEXT_PASS (pass_rtl_unroll_and_peel_loops); > > NEXT_PASS (pass_rtl_doloop); > > NEXT_PASS (pass_rtl_loop_done); > > Index: gcc/loop-init.c > > =================================================================== > > --- gcc/loop-init.c (revision 209410) > > +++ gcc/loop-init.c (working copy) > > @@ -518,61 +518,7 @@ make_pass_rtl_move_loop_invariants (gcc: > > } > > > > > > -/* Loop unswitching for RTL. */ > > -static bool > > -gate_rtl_unswitch (void) > > -{ > > - return flag_unswitch_loops; > > -} > > - > > -static unsigned int > > -rtl_unswitch (void) > > -{ > > - if (number_of_loops (cfun) > 1) > > - unswitch_loops (); > > - return 0; > > -} > > - > > -namespace { > > - > > -const pass_data pass_data_rtl_unswitch = > > -{ > > - RTL_PASS, /* type */ > > - "loop2_unswitch", /* name */ > > - OPTGROUP_LOOP, /* optinfo_flags */ > > - true, /* has_gate */ > > - true, /* has_execute */ > > - TV_LOOP_UNSWITCH, /* tv_id */ > > - 0, /* properties_required */ > > - 0, /* properties_provided */ > > - 0, /* properties_destroyed */ > > - 0, /* todo_flags_start */ > > - TODO_verify_rtl_sharing, /* todo_flags_finish */ > > -}; > > - > > -class pass_rtl_unswitch : public rtl_opt_pass > > -{ > > -public: > > - pass_rtl_unswitch (gcc::context *ctxt) > > - : rtl_opt_pass (pass_data_rtl_unswitch, ctxt) > > - {} > > - > > - /* opt_pass methods: */ > > - bool gate () { return gate_rtl_unswitch (); } > > - unsigned int execute () { return rtl_unswitch (); } > > - > > -}; // class pass_rtl_unswitch > > - > > -} // anon namespace > > - > > -rtl_opt_pass * > > -make_pass_rtl_unswitch (gcc::context *ctxt) > > -{ > > - return new pass_rtl_unswitch (ctxt); > > -} > > - > > - > > -/* Loop unswitching for RTL. */ > > +/* Loop unrolling and peeling for RTL. */ > > static bool > > gate_rtl_unroll_and_peel_loops (void) > > { > > Index: gcc/loop-iv.c > > =================================================================== > > --- gcc/loop-iv.c (revision 209410) > > +++ gcc/loop-iv.c (working copy) > > @@ -1732,6 +1732,21 @@ canon_condition (rtx cond) > > return cond; > > } > > > > +/* Reverses CONDition; returns NULL if we cannot. */ > > + > > +static rtx > > +reversed_condition (rtx cond) > > +{ > > + enum rtx_code reversed; > > + reversed = reversed_comparison_code (cond, NULL); > > + if (reversed == UNKNOWN) > > + return NULL_RTX; > > + else > > + return gen_rtx_fmt_ee (reversed, > > + GET_MODE (cond), XEXP (cond, 0), > > + XEXP (cond, 1)); > > +} > > + > > /* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the > > set of altered regs. */ > > > > Index: gcc/loop-unroll.c > > =================================================================== > > --- gcc/loop-unroll.c (revision 209410) > > +++ gcc/loop-unroll.c (working copy) > > @@ -1060,6 +1060,59 @@ split_edge_and_insert (edge e, rtx insns > > return bb; > > } > > > > +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if > > + true, with probability PROB. If CINSN is not NULL, it is the insn to copy > > + in order to create a jump. */ > > + > > +static rtx > > +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, > > + rtx cinsn) > > +{ > > + rtx seq, jump, cond; > > + enum machine_mode mode; > > + > > + mode = GET_MODE (op0); > > + if (mode == VOIDmode) > > + mode = GET_MODE (op1); > > + > > + start_sequence (); > > + if (GET_MODE_CLASS (mode) == MODE_CC) > > + { > > + /* A hack -- there seems to be no easy generic way how to make a > > + conditional jump from a ccmode comparison. */ > > + gcc_assert (cinsn); > > + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); > > + gcc_assert (GET_CODE (cond) == comp); > > + gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); > > + gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); > > + emit_jump_insn (copy_insn (PATTERN (cinsn))); > > + jump = get_last_insn (); > > + gcc_assert (JUMP_P (jump)); > > + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); > > + LABEL_NUSES (JUMP_LABEL (jump))++; > > + redirect_jump (jump, label, 0); > > + } > > + else > > + { > > + gcc_assert (!cinsn); > > + > > + op0 = force_operand (op0, NULL_RTX); > > + op1 = force_operand (op1, NULL_RTX); > > + do_compare_rtx_and_jump (op0, op1, comp, 0, > > + mode, NULL_RTX, NULL_RTX, label, -1); > > + jump = get_last_insn (); > > + gcc_assert (JUMP_P (jump)); > > + JUMP_LABEL (jump) = label; > > + LABEL_NUSES (label)++; > > + } > > + add_int_reg_note (jump, REG_BR_PROB, prob); > > + > > + seq = get_insns (); > > + end_sequence (); > > + > > + return seq; > > +} > > + > > /* Unroll LOOP for which we are able to count number of iterations in runtime > > LOOP->LPT_DECISION.TIMES times. The transformation does this (with some > > extra care for case n < 0): > > Index: gcc/rtl.h > > =================================================================== > > --- gcc/rtl.h (revision 209410) > > +++ gcc/rtl.h (working copy) > > @@ -2743,10 +2743,6 @@ extern unsigned int variable_tracking_ma > > extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, > > rtx *, rtx *); > > > > -/* In loop-unswitch.c */ > > -extern rtx reversed_condition (rtx); > > -extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); > > - > > /* In loop-iv.c */ > > extern rtx canon_condition (rtx); > > extern void simplify_using_condition (rtx, rtx *, bitmap); > >
> On Sun, 20 Apr 2014, Jan Hubicka wrote: > > > > > > > This removes RTL loop unswitching (see last years discussion about > > > compile-time issues of that pass). RTL loop unswitching is > > > enabled together with GIMPLE loop unswitching at -O3 and by > > > -floop-unswitch. It's clearly the wrong place to do high-level > > > loop transforms these days, and the cost of maintainance doesn't > > > outweight the questionable benefit. > > > > > > Thus the following patch removes it. > > > > > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu (I hope > > > for testsuite fallout). > > > > > > Any objections? > > > > Not really, I am all for moving more of loop stuff to trees. > > Did you performed some benchmarks? (I remember I did in 2012 > > but completely forgot the outcome). > > I did that last year and it showed no difference in SPEC 2k6. > > When bootstrapping with -O3 and a gcc_unreachable () in the > RTL unswitching path you get some ICEs there but they are > due to different "effective" --param max-unswitch-insns that > is on GIMPLE applied to tree_num_loop_insns () and on RTL > to num_loop_insns (). Yep, I remember seeing some interesting special cases where RTL analyzis did catch on invariants but tree didn't, but nothing important. > > I'll go forward with the patch today. > > > On related note, shall I try to update the following? > > http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html > > Yeah. Will do, Honza > > Thanks, > Richard. > > > Honza > > > > > > Thanks, > > > Richard. > > > > > > 2014-04-15 Richard Biener <rguenther@suse.de> > > > > > > * Makefile.in (OBJS): Remove loop-unswitch.o. > > > * loop-unswitch.c: Delete. > > > * tree-pass.h (make_pass_rtl_unswitch): Remove. > > > * passes.def (pass_rtl_unswitch): Likewise. > > > * loop-init.c (gate_rtl_unswitch): Likewise. > > > (rtl_unswitch): Likewise. > > > (pass_data_rtl_unswitch): Likewise. > > > (pass_rtl_unswitch): Likewise. > > > (make_pass_rtl_unswitch): Likewise. > > > * rtl.h (reversed_condition): Likewise. > > > (compare_and_jump_seq): Likewise. > > > * loop-iv.c (reversed_condition): Move here from loop-unswitch.c > > > and make static. > > > * loop-unroll.c (compare_and_jump_seq): Likewise. > > > > > > Index: gcc/Makefile.in > > > =================================================================== > > > --- gcc/Makefile.in (revision 209410) > > > +++ gcc/Makefile.in (working copy) > > > @@ -1294,7 +1294,6 @@ OBJS = \ > > > loop-invariant.o \ > > > loop-iv.o \ > > > loop-unroll.o \ > > > - loop-unswitch.o \ > > > lower-subreg.o \ > > > lra.o \ > > > lra-assigns.o \ > > > Index: gcc/tree-pass.h > > > =================================================================== > > > --- gcc/tree-pass.h (revision 209410) > > > +++ gcc/tree-pass.h (working copy) > > > @@ -512,7 +512,6 @@ extern rtl_opt_pass *make_pass_outof_cfg > > > extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt); > > > extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt); > > > extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt); > > > -extern rtl_opt_pass *make_pass_rtl_unswitch (gcc::context *ctxt); > > > extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt); > > > extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); > > > extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); > > > Index: gcc/passes.def > > > =================================================================== > > > --- gcc/passes.def (revision 209410) > > > +++ gcc/passes.def (working copy) > > > @@ -341,7 +341,6 @@ along with GCC; see the file COPYING3. > > > PUSH_INSERT_PASSES_WITHIN (pass_loop2) > > > NEXT_PASS (pass_rtl_loop_init); > > > NEXT_PASS (pass_rtl_move_loop_invariants); > > > - NEXT_PASS (pass_rtl_unswitch); > > > NEXT_PASS (pass_rtl_unroll_and_peel_loops); > > > NEXT_PASS (pass_rtl_doloop); > > > NEXT_PASS (pass_rtl_loop_done); > > > Index: gcc/loop-init.c > > > =================================================================== > > > --- gcc/loop-init.c (revision 209410) > > > +++ gcc/loop-init.c (working copy) > > > @@ -518,61 +518,7 @@ make_pass_rtl_move_loop_invariants (gcc: > > > } > > > > > > > > > -/* Loop unswitching for RTL. */ > > > -static bool > > > -gate_rtl_unswitch (void) > > > -{ > > > - return flag_unswitch_loops; > > > -} > > > - > > > -static unsigned int > > > -rtl_unswitch (void) > > > -{ > > > - if (number_of_loops (cfun) > 1) > > > - unswitch_loops (); > > > - return 0; > > > -} > > > - > > > -namespace { > > > - > > > -const pass_data pass_data_rtl_unswitch = > > > -{ > > > - RTL_PASS, /* type */ > > > - "loop2_unswitch", /* name */ > > > - OPTGROUP_LOOP, /* optinfo_flags */ > > > - true, /* has_gate */ > > > - true, /* has_execute */ > > > - TV_LOOP_UNSWITCH, /* tv_id */ > > > - 0, /* properties_required */ > > > - 0, /* properties_provided */ > > > - 0, /* properties_destroyed */ > > > - 0, /* todo_flags_start */ > > > - TODO_verify_rtl_sharing, /* todo_flags_finish */ > > > -}; > > > - > > > -class pass_rtl_unswitch : public rtl_opt_pass > > > -{ > > > -public: > > > - pass_rtl_unswitch (gcc::context *ctxt) > > > - : rtl_opt_pass (pass_data_rtl_unswitch, ctxt) > > > - {} > > > - > > > - /* opt_pass methods: */ > > > - bool gate () { return gate_rtl_unswitch (); } > > > - unsigned int execute () { return rtl_unswitch (); } > > > - > > > -}; // class pass_rtl_unswitch > > > - > > > -} // anon namespace > > > - > > > -rtl_opt_pass * > > > -make_pass_rtl_unswitch (gcc::context *ctxt) > > > -{ > > > - return new pass_rtl_unswitch (ctxt); > > > -} > > > - > > > - > > > -/* Loop unswitching for RTL. */ > > > +/* Loop unrolling and peeling for RTL. */ > > > static bool > > > gate_rtl_unroll_and_peel_loops (void) > > > { > > > Index: gcc/loop-iv.c > > > =================================================================== > > > --- gcc/loop-iv.c (revision 209410) > > > +++ gcc/loop-iv.c (working copy) > > > @@ -1732,6 +1732,21 @@ canon_condition (rtx cond) > > > return cond; > > > } > > > > > > +/* Reverses CONDition; returns NULL if we cannot. */ > > > + > > > +static rtx > > > +reversed_condition (rtx cond) > > > +{ > > > + enum rtx_code reversed; > > > + reversed = reversed_comparison_code (cond, NULL); > > > + if (reversed == UNKNOWN) > > > + return NULL_RTX; > > > + else > > > + return gen_rtx_fmt_ee (reversed, > > > + GET_MODE (cond), XEXP (cond, 0), > > > + XEXP (cond, 1)); > > > +} > > > + > > > /* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the > > > set of altered regs. */ > > > > > > Index: gcc/loop-unroll.c > > > =================================================================== > > > --- gcc/loop-unroll.c (revision 209410) > > > +++ gcc/loop-unroll.c (working copy) > > > @@ -1060,6 +1060,59 @@ split_edge_and_insert (edge e, rtx insns > > > return bb; > > > } > > > > > > +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if > > > + true, with probability PROB. If CINSN is not NULL, it is the insn to copy > > > + in order to create a jump. */ > > > + > > > +static rtx > > > +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, > > > + rtx cinsn) > > > +{ > > > + rtx seq, jump, cond; > > > + enum machine_mode mode; > > > + > > > + mode = GET_MODE (op0); > > > + if (mode == VOIDmode) > > > + mode = GET_MODE (op1); > > > + > > > + start_sequence (); > > > + if (GET_MODE_CLASS (mode) == MODE_CC) > > > + { > > > + /* A hack -- there seems to be no easy generic way how to make a > > > + conditional jump from a ccmode comparison. */ > > > + gcc_assert (cinsn); > > > + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); > > > + gcc_assert (GET_CODE (cond) == comp); > > > + gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); > > > + gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); > > > + emit_jump_insn (copy_insn (PATTERN (cinsn))); > > > + jump = get_last_insn (); > > > + gcc_assert (JUMP_P (jump)); > > > + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); > > > + LABEL_NUSES (JUMP_LABEL (jump))++; > > > + redirect_jump (jump, label, 0); > > > + } > > > + else > > > + { > > > + gcc_assert (!cinsn); > > > + > > > + op0 = force_operand (op0, NULL_RTX); > > > + op1 = force_operand (op1, NULL_RTX); > > > + do_compare_rtx_and_jump (op0, op1, comp, 0, > > > + mode, NULL_RTX, NULL_RTX, label, -1); > > > + jump = get_last_insn (); > > > + gcc_assert (JUMP_P (jump)); > > > + JUMP_LABEL (jump) = label; > > > + LABEL_NUSES (label)++; > > > + } > > > + add_int_reg_note (jump, REG_BR_PROB, prob); > > > + > > > + seq = get_insns (); > > > + end_sequence (); > > > + > > > + return seq; > > > +} > > > + > > > /* Unroll LOOP for which we are able to count number of iterations in runtime > > > LOOP->LPT_DECISION.TIMES times. The transformation does this (with some > > > extra care for case n < 0): > > > Index: gcc/rtl.h > > > =================================================================== > > > --- gcc/rtl.h (revision 209410) > > > +++ gcc/rtl.h (working copy) > > > @@ -2743,10 +2743,6 @@ extern unsigned int variable_tracking_ma > > > extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, > > > rtx *, rtx *); > > > > > > -/* In loop-unswitch.c */ > > > -extern rtx reversed_condition (rtx); > > > -extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); > > > - > > > /* In loop-iv.c */ > > > extern rtx canon_condition (rtx); > > > extern void simplify_using_condition (rtx, rtx *, bitmap); > > > > > > -- > Richard Biener <rguenther@suse.de> > SUSE / SUSE Labs > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 209410) +++ gcc/Makefile.in (working copy) @@ -1294,7 +1294,6 @@ OBJS = \ loop-invariant.o \ loop-iv.o \ loop-unroll.o \ - loop-unswitch.o \ lower-subreg.o \ lra.o \ lra-assigns.o \ Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 209410) +++ gcc/tree-pass.h (working copy) @@ -512,7 +512,6 @@ extern rtl_opt_pass *make_pass_outof_cfg extern rtl_opt_pass *make_pass_loop2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_loop_init (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_move_loop_invariants (gcc::context *ctxt); -extern rtl_opt_pass *make_pass_rtl_unswitch (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_unroll_and_peel_loops (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_doloop (gcc::context *ctxt); extern rtl_opt_pass *make_pass_rtl_loop_done (gcc::context *ctxt); Index: gcc/passes.def =================================================================== --- gcc/passes.def (revision 209410) +++ gcc/passes.def (working copy) @@ -341,7 +341,6 @@ along with GCC; see the file COPYING3. PUSH_INSERT_PASSES_WITHIN (pass_loop2) NEXT_PASS (pass_rtl_loop_init); NEXT_PASS (pass_rtl_move_loop_invariants); - NEXT_PASS (pass_rtl_unswitch); NEXT_PASS (pass_rtl_unroll_and_peel_loops); NEXT_PASS (pass_rtl_doloop); NEXT_PASS (pass_rtl_loop_done); Index: gcc/loop-init.c =================================================================== --- gcc/loop-init.c (revision 209410) +++ gcc/loop-init.c (working copy) @@ -518,61 +518,7 @@ make_pass_rtl_move_loop_invariants (gcc: } -/* Loop unswitching for RTL. */ -static bool -gate_rtl_unswitch (void) -{ - return flag_unswitch_loops; -} - -static unsigned int -rtl_unswitch (void) -{ - if (number_of_loops (cfun) > 1) - unswitch_loops (); - return 0; -} - -namespace { - -const pass_data pass_data_rtl_unswitch = -{ - RTL_PASS, /* type */ - "loop2_unswitch", /* name */ - OPTGROUP_LOOP, /* optinfo_flags */ - true, /* has_gate */ - true, /* has_execute */ - TV_LOOP_UNSWITCH, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_verify_rtl_sharing, /* todo_flags_finish */ -}; - -class pass_rtl_unswitch : public rtl_opt_pass -{ -public: - pass_rtl_unswitch (gcc::context *ctxt) - : rtl_opt_pass (pass_data_rtl_unswitch, ctxt) - {} - - /* opt_pass methods: */ - bool gate () { return gate_rtl_unswitch (); } - unsigned int execute () { return rtl_unswitch (); } - -}; // class pass_rtl_unswitch - -} // anon namespace - -rtl_opt_pass * -make_pass_rtl_unswitch (gcc::context *ctxt) -{ - return new pass_rtl_unswitch (ctxt); -} - - -/* Loop unswitching for RTL. */ +/* Loop unrolling and peeling for RTL. */ static bool gate_rtl_unroll_and_peel_loops (void) { Index: gcc/loop-iv.c =================================================================== --- gcc/loop-iv.c (revision 209410) +++ gcc/loop-iv.c (working copy) @@ -1732,6 +1732,21 @@ canon_condition (rtx cond) return cond; } +/* Reverses CONDition; returns NULL if we cannot. */ + +static rtx +reversed_condition (rtx cond) +{ + enum rtx_code reversed; + reversed = reversed_comparison_code (cond, NULL); + if (reversed == UNKNOWN) + return NULL_RTX; + else + return gen_rtx_fmt_ee (reversed, + GET_MODE (cond), XEXP (cond, 0), + XEXP (cond, 1)); +} + /* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the set of altered regs. */ Index: gcc/loop-unroll.c =================================================================== --- gcc/loop-unroll.c (revision 209410) +++ gcc/loop-unroll.c (working copy) @@ -1060,6 +1060,59 @@ split_edge_and_insert (edge e, rtx insns return bb; } +/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if + true, with probability PROB. If CINSN is not NULL, it is the insn to copy + in order to create a jump. */ + +static rtx +compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, + rtx cinsn) +{ + rtx seq, jump, cond; + enum machine_mode mode; + + mode = GET_MODE (op0); + if (mode == VOIDmode) + mode = GET_MODE (op1); + + start_sequence (); + if (GET_MODE_CLASS (mode) == MODE_CC) + { + /* A hack -- there seems to be no easy generic way how to make a + conditional jump from a ccmode comparison. */ + gcc_assert (cinsn); + cond = XEXP (SET_SRC (pc_set (cinsn)), 0); + gcc_assert (GET_CODE (cond) == comp); + gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); + gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); + emit_jump_insn (copy_insn (PATTERN (cinsn))); + jump = get_last_insn (); + gcc_assert (JUMP_P (jump)); + JUMP_LABEL (jump) = JUMP_LABEL (cinsn); + LABEL_NUSES (JUMP_LABEL (jump))++; + redirect_jump (jump, label, 0); + } + else + { + gcc_assert (!cinsn); + + op0 = force_operand (op0, NULL_RTX); + op1 = force_operand (op1, NULL_RTX); + do_compare_rtx_and_jump (op0, op1, comp, 0, + mode, NULL_RTX, NULL_RTX, label, -1); + jump = get_last_insn (); + gcc_assert (JUMP_P (jump)); + JUMP_LABEL (jump) = label; + LABEL_NUSES (label)++; + } + add_int_reg_note (jump, REG_BR_PROB, prob); + + seq = get_insns (); + end_sequence (); + + return seq; +} + /* Unroll LOOP for which we are able to count number of iterations in runtime LOOP->LPT_DECISION.TIMES times. The transformation does this (with some extra care for case n < 0): Index: gcc/rtl.h =================================================================== --- gcc/rtl.h (revision 209410) +++ gcc/rtl.h (working copy) @@ -2743,10 +2743,6 @@ extern unsigned int variable_tracking_ma extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, rtx *, rtx *); -/* In loop-unswitch.c */ -extern rtx reversed_condition (rtx); -extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx); - /* In loop-iv.c */ extern rtx canon_condition (rtx); extern void simplify_using_condition (rtx, rtx *, bitmap);