Message ID | ce8ab2bc262ee063b50c3f939e42905b5ed1acbc.1490280700.git.segher@kernel.crashing.org |
---|---|
State | New |
Headers | show |
On Thu, Mar 23, 2017 at 3:58 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > The algorithm fwprop uses never reconsiders a possible propagation, > although it could succeed if the def (in the def-use to propagate) > has changed. This causes fwprop to do infinite propagations, like > in the scenario in the PR, where we end up with effectively > B = A > A = B > D = A > where only propagations into the last statement are still tried, and > that loops (it becomes D = B, then back to D = A, etc.) > > Fixing this properly isn't easy; this patch instead limits the number > of propagations performed to the number of uses we originally had, > which is the maximum number of propagations that can be done if there > are no such infinite loops. > > Bootstrapped and regression checked on powerpc64-linux {-m64,-m32}; > is this okay for trunk? https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html ? > > Segher > > > 2017-03-23 Segher Boessenkool <segher@kernel.crashing.org> > > PR rtl-optimization/79405 > * fwprop.c (propagations_left): New variable. > (forward_propagate_into): Decrement it. > (fwprop_init): Initialize it. > (fw_prop): If the variable has reached zero, stop propagating. > (fwprop_addr): Ditto. > > gcc/testsuite/ > PR rtl-optimization/79405 > gcc.dg/pr79405.c: New testcase. > > --- > gcc/fwprop.c | 17 ++++++++++++++++ > gcc/testsuite/gcc.dg/pr79405.c | 45 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 62 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/pr79405.c > > diff --git a/gcc/fwprop.c b/gcc/fwprop.c > index 285fb1a..0ab25ef 100644 > --- a/gcc/fwprop.c > +++ b/gcc/fwprop.c > @@ -120,6 +120,13 @@ static vec<df_ref> use_def_ref; > static vec<df_ref> reg_defs; > static vec<df_ref> reg_defs_stack; > > +/* The maximum number of propagations that are still allowed. If we do > + more propagations than originally we had uses, we must have ended up > + in a propagation loop, as in PR79405. Until the algorithm fwprop > + uses can obviously not get into such loops we need a workaround like > + this. */ > +static int propagations_left; > + > /* The MD bitmaps are trimmed to include only live registers to cut > memory usage on testcases like insn-recog.c. Track live registers > in the basic block and do not perform forward propagation if the > @@ -1407,6 +1414,8 @@ forward_propagate_into (df_ref use) > if (forward_propagate_and_simplify (use, def_insn, def_set) > || forward_propagate_subreg (use, def_insn, def_set)) > { > + propagations_left--; > + > if (cfun->can_throw_non_call_exceptions > && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX) > && purge_dead_edges (DF_REF_BB (use))) > @@ -1434,6 +1443,8 @@ fwprop_init (void) > active_defs = XNEWVEC (df_ref, max_reg_num ()); > if (flag_checking) > active_defs_check = sparseset_alloc (max_reg_num ()); > + > + propagations_left = DF_USES_TABLE_SIZE (); > } > > static void > @@ -1480,6 +1491,9 @@ fwprop (void) > > for (i = 0; i < DF_USES_TABLE_SIZE (); i++) > { > + if (!propagations_left) > + break; > + > df_ref use = DF_USES_GET (i); > if (use) > if (DF_REF_TYPE (use) == DF_REF_REG_USE > @@ -1540,6 +1554,9 @@ fwprop_addr (void) > end, and we'll go through them as well. */ > for (i = 0; i < DF_USES_TABLE_SIZE (); i++) > { > + if (!propagations_left) > + break; > + > df_ref use = DF_USES_GET (i); > if (use) > if (DF_REF_TYPE (use) != DF_REF_REG_USE > diff --git a/gcc/testsuite/gcc.dg/pr79405.c b/gcc/testsuite/gcc.dg/pr79405.c > new file mode 100644 > index 0000000..c17baff > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr79405.c > @@ -0,0 +1,45 @@ > +/* PR rtl-optimization/79405 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +char cz; > +long long int xx, u2; > + > +void > +qv (int js, int wl) > +{ > + if (js != 0) > + { > + short int sc; > + int *at = (int *)≻ > + long long int gx = 0; > + > + for (;;) > + { > + *at = 0; > + js /= sc; > + > + for (wl = 0; wl < 2; ++wl) > + { > + xx = gx; > + u2 %= xx > 0; > + cz /= u2; > + > + fa: > + if (cz != u2) > + { > + gx |= js; > + cz = gx / js; > + } > + } > + } > + > + yq: > + wl /= 0x80000000; > + u2 = wl; > + u2 |= (wl != 0) | (wl != 0 && gx != 0); > + js = u2; > + goto fa; > + } > + goto yq; > +} > -- > 1.9.3 >
On Thu, Mar 23, 2017 at 04:16:56PM +0100, Richard Biener wrote: > On Thu, Mar 23, 2017 at 3:58 PM, Segher Boessenkool > <segher@kernel.crashing.org> wrote: > > The algorithm fwprop uses never reconsiders a possible propagation, > > although it could succeed if the def (in the def-use to propagate) > > has changed. This causes fwprop to do infinite propagations, like > > in the scenario in the PR, where we end up with effectively > > B = A > > A = B > > D = A > > where only propagations into the last statement are still tried, and > > that loops (it becomes D = B, then back to D = A, etc.) > > > > Fixing this properly isn't easy; this patch instead limits the number > > of propagations performed to the number of uses we originally had, > > which is the maximum number of propagations that can be done if there > > are no such infinite loops. > > > > Bootstrapped and regression checked on powerpc64-linux {-m64,-m32}; > > is this okay for trunk? > > https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html > > ? What about it? That suggestion would make fwprop do *less* useful work, while in principle the problem is that it *already* does not enough! If fwprop actually tried all propagations (and never tried substituting X with X, which it currently does), there would be no problem. This patch is a workaround; it makes no difference on pretty much any code, except this single testcase (which has undefined behaviour, uninitialised variables). Segher
On Thu, Mar 23, 2017 at 4:45 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > On Thu, Mar 23, 2017 at 04:16:56PM +0100, Richard Biener wrote: >> On Thu, Mar 23, 2017 at 3:58 PM, Segher Boessenkool >> <segher@kernel.crashing.org> wrote: >> > The algorithm fwprop uses never reconsiders a possible propagation, >> > although it could succeed if the def (in the def-use to propagate) >> > has changed. This causes fwprop to do infinite propagations, like >> > in the scenario in the PR, where we end up with effectively >> > B = A >> > A = B >> > D = A >> > where only propagations into the last statement are still tried, and >> > that loops (it becomes D = B, then back to D = A, etc.) >> > >> > Fixing this properly isn't easy; this patch instead limits the number >> > of propagations performed to the number of uses we originally had, >> > which is the maximum number of propagations that can be done if there >> > are no such infinite loops. >> > >> > Bootstrapped and regression checked on powerpc64-linux {-m64,-m32}; >> > is this okay for trunk? >> >> https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html >> >> ? > > What about it? That suggestion would make fwprop do *less* useful work, > while in principle the problem is that it *already* does not enough! Ok, not knowing a lot about fwprop I take your word for it (but GIMPLE level forwprop works that way and certainly you can't substitute a def into a use that is before the def). > If fwprop actually tried all propagations (and never tried substituting > X with X, which it currently does), there would be no problem. To me it looked like fwprop tries to iterate over all propagations but it iterates over a changing data structure which is why it "oscillates". But I didn't actually verify that theory (in fact, I just very much disliked Berns patch give its compile-time cost). > This patch is a workaround; it makes no difference on pretty much any > code, except this single testcase (which has undefined behaviour, > uninitialised variables). Yeah, your fix looks similar to the other hack I suggested. Richard. > > Segher
On Fri, Mar 24, 2017 at 09:13:59AM +0100, Richard Biener wrote: > >> https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html > >> > >> ? > > > > What about it? That suggestion would make fwprop do *less* useful work, > > while in principle the problem is that it *already* does not enough! > > Ok, not knowing a lot about fwprop I take your word for it (but GIMPLE level > forwprop works that way and certainly you can't substitute a def into a use > that is before the def). When fwprop has done a propagation it makes fresh new uses for all the sources of the resulting insn, which it adds at the end of the table. It doesn't reuse the original uses. > > If fwprop actually tried all propagations (and never tried substituting > > X with X, which it currently does), there would be no problem. > > To me it looked like fwprop tries to iterate over all propagations but > it iterates over a changing data structure which is why it "oscillates". > But I didn't actually verify that theory (in fact, I just very much disliked > Berns patch give its compile-time cost). We have, in order, in one bb: B = A|Z A = B D = A where each of those is the only set of its dest. Now the first thing tried is propagating the first into the second. This fails. Then, Z is found to be zero, so we get B = A A = B D = A If the first was now propagated into the second again, all is fine (all three vars are set to A). But this is not tried again. The second is propagated into the third, giving B = A A = B D = B and then the first is propagated into the third, and we repeat forever. > > This patch is a workaround; it makes no difference on pretty much any > > code, except this single testcase (which has undefined behaviour, > > uninitialised variables). > > Yeah, your fix looks similar to the other hack I suggested. I have implemented the "retry things" as well fwiw, but a) it is too big and invasive for stage 4, and b) it kind of sucks, needs more work, even more invasive. The workaround is cheap and solves the immediate problem. Segher
On Fri, Mar 24, 2017 at 10:39 AM, Segher Boessenkool <segher@kernel.crashing.org> wrote: > On Fri, Mar 24, 2017 at 09:13:59AM +0100, Richard Biener wrote: >> >> https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html >> >> >> >> ? >> > >> > What about it? That suggestion would make fwprop do *less* useful work, >> > while in principle the problem is that it *already* does not enough! >> >> Ok, not knowing a lot about fwprop I take your word for it (but GIMPLE level >> forwprop works that way and certainly you can't substitute a def into a use >> that is before the def). > > When fwprop has done a propagation it makes fresh new uses for all the > sources of the resulting insn, which it adds at the end of the table. > It doesn't reuse the original uses. Yes, that's what I suspected. >> > If fwprop actually tried all propagations (and never tried substituting >> > X with X, which it currently does), there would be no problem. >> >> To me it looked like fwprop tries to iterate over all propagations but >> it iterates over a changing data structure which is why it "oscillates". >> But I didn't actually verify that theory (in fact, I just very much disliked >> Berns patch give its compile-time cost). > > We have, in order, in one bb: > > B = A|Z > A = B > D = A > > where each of those is the only set of its dest. Now the first thing > tried is propagating the first into the second. This fails. Then, > Z is found to be zero, so we get > > B = A > A = B > D = A > > If the first was now propagated into the second again, all is fine > (all three vars are set to A). But this is not tried again. The > second is propagated into the third, giving > > B = A > A = B > D = B > > and then the first is propagated into the third, and we repeat > forever. > >> > This patch is a workaround; it makes no difference on pretty much any >> > code, except this single testcase (which has undefined behaviour, >> > uninitialised variables). >> >> Yeah, your fix looks similar to the other hack I suggested. > > I have implemented the "retry things" as well fwiw, but a) it is too > big and invasive for stage 4, and b) it kind of sucks, needs more > work, even more invasive. The workaround is cheap and solves the > immediate problem. Agreed, still iterating over the DF uses in the first place looks like the bug (given this "all uses" data structure changes during propagation!). I'd have done for (BBs in RPO order) for (insn in BB) repeat: for (use in insn) if (propagate_into (use)) goto repeat; Richard. > > Segher
On 03/24/2017 05:54 AM, Richard Biener wrote: > On Fri, Mar 24, 2017 at 10:39 AM, Segher Boessenkool > <segher@kernel.crashing.org> wrote: >> On Fri, Mar 24, 2017 at 09:13:59AM +0100, Richard Biener wrote: >>>>> https://gcc.gnu.org/ml/gcc-patches/2017-02/msg01485.html >>>>> >>>>> ? >>>> >>>> What about it? That suggestion would make fwprop do *less* useful work, >>>> while in principle the problem is that it *already* does not enough! >>> >>> Ok, not knowing a lot about fwprop I take your word for it (but GIMPLE level >>> forwprop works that way and certainly you can't substitute a def into a use >>> that is before the def). >> >> When fwprop has done a propagation it makes fresh new uses for all the >> sources of the resulting insn, which it adds at the end of the table. >> It doesn't reuse the original uses. > > Yes, that's what I suspected. > >>>> If fwprop actually tried all propagations (and never tried substituting >>>> X with X, which it currently does), there would be no problem. >>> >>> To me it looked like fwprop tries to iterate over all propagations but >>> it iterates over a changing data structure which is why it "oscillates". >>> But I didn't actually verify that theory (in fact, I just very much disliked >>> Berns patch give its compile-time cost). >> >> We have, in order, in one bb: >> >> B = A|Z >> A = B >> D = A >> >> where each of those is the only set of its dest. Now the first thing >> tried is propagating the first into the second. This fails. Then, >> Z is found to be zero, so we get >> >> B = A >> A = B >> D = A >> >> If the first was now propagated into the second again, all is fine >> (all three vars are set to A). But this is not tried again. The >> second is propagated into the third, giving >> >> B = A >> A = B >> D = B >> >> and then the first is propagated into the third, and we repeat >> forever. >> >>>> This patch is a workaround; it makes no difference on pretty much any >>>> code, except this single testcase (which has undefined behaviour, >>>> uninitialised variables). >>> >>> Yeah, your fix looks similar to the other hack I suggested. >> >> I have implemented the "retry things" as well fwiw, but a) it is too >> big and invasive for stage 4, and b) it kind of sucks, needs more >> work, even more invasive. The workaround is cheap and solves the >> immediate problem. > > Agreed, still iterating over the DF uses in the first place looks like > the bug (given this "all uses" data structure changes during propagation!). > > I'd have done > > for (BBs in RPO order) > for (insn in BB) > repeat: > for (use in insn) > if (propagate_into (use)) > goto repeat; I wonder why it wasn't implemented like this in the first place. ISTM that with the exception of loops that it ought to give the same final result. For loops, I wouldn't expect opportunities along backedges to be all that common. It also seems to me that we ought to be able to verify if the different order changes things. Just to be clear, I can live with either approach. I would even be option to Segher's as a stopgap and yours for gcc-8. jeff
On Fri, Mar 24, 2017 at 12:54:35PM +0100, Richard Biener wrote: > > I have implemented the "retry things" as well fwiw, but a) it is too > > big and invasive for stage 4, and b) it kind of sucks, needs more > > work, even more invasive. The workaround is cheap and solves the > > immediate problem. > > Agreed, still iterating over the DF uses in the first place looks like > the bug (given this "all uses" data structure changes during propagation!). > > I'd have done > > for (BBs in RPO order) > for (insn in BB) > repeat: > for (use in insn) > if (propagate_into (use)) > goto repeat; That is more or less how the def-use links for fwprop are built now, but not the order that propagations are tried in. I think originally it used all single definitions, not just those dominating their uses, in which case something like your proposed algorithm does not work (there can be def-use loops, even irreducible loops). I agree it would be good to try something like you suggest in next stage 1. Segher
On 03/23/2017 08:58 AM, Segher Boessenkool wrote: > The algorithm fwprop uses never reconsiders a possible propagation, > although it could succeed if the def (in the def-use to propagate) > has changed. This causes fwprop to do infinite propagations, like > in the scenario in the PR, where we end up with effectively > B = A > A = B > D = A > where only propagations into the last statement are still tried, and > that loops (it becomes D = B, then back to D = A, etc.) > > Fixing this properly isn't easy; this patch instead limits the number > of propagations performed to the number of uses we originally had, > which is the maximum number of propagations that can be done if there > are no such infinite loops. > > Bootstrapped and regression checked on powerpc64-linux {-m64,-m32}; > is this okay for trunk? > > > Segher > > > 2017-03-23 Segher Boessenkool <segher@kernel.crashing.org> > > PR rtl-optimization/79405 > * fwprop.c (propagations_left): New variable. > (forward_propagate_into): Decrement it. > (fwprop_init): Initialize it. > (fw_prop): If the variable has reached zero, stop propagating. > (fwprop_addr): Ditto. > > gcc/testsuite/ > PR rtl-optimization/79405 > gcc.dg/pr79405.c: New testcase. I installed this as a stopgap for gcc-7. The bug is still open targetting gcc-8 to force us to come back and look at Richi's suggestion. jeff
diff --git a/gcc/fwprop.c b/gcc/fwprop.c index 285fb1a..0ab25ef 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -120,6 +120,13 @@ static vec<df_ref> use_def_ref; static vec<df_ref> reg_defs; static vec<df_ref> reg_defs_stack; +/* The maximum number of propagations that are still allowed. If we do + more propagations than originally we had uses, we must have ended up + in a propagation loop, as in PR79405. Until the algorithm fwprop + uses can obviously not get into such loops we need a workaround like + this. */ +static int propagations_left; + /* The MD bitmaps are trimmed to include only live registers to cut memory usage on testcases like insn-recog.c. Track live registers in the basic block and do not perform forward propagation if the @@ -1407,6 +1414,8 @@ forward_propagate_into (df_ref use) if (forward_propagate_and_simplify (use, def_insn, def_set) || forward_propagate_subreg (use, def_insn, def_set)) { + propagations_left--; + if (cfun->can_throw_non_call_exceptions && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX) && purge_dead_edges (DF_REF_BB (use))) @@ -1434,6 +1443,8 @@ fwprop_init (void) active_defs = XNEWVEC (df_ref, max_reg_num ()); if (flag_checking) active_defs_check = sparseset_alloc (max_reg_num ()); + + propagations_left = DF_USES_TABLE_SIZE (); } static void @@ -1480,6 +1491,9 @@ fwprop (void) for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { + if (!propagations_left) + break; + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) == DF_REF_REG_USE @@ -1540,6 +1554,9 @@ fwprop_addr (void) end, and we'll go through them as well. */ for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { + if (!propagations_left) + break; + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) != DF_REF_REG_USE diff --git a/gcc/testsuite/gcc.dg/pr79405.c b/gcc/testsuite/gcc.dg/pr79405.c new file mode 100644 index 0000000..c17baff --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr79405.c @@ -0,0 +1,45 @@ +/* PR rtl-optimization/79405 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +char cz; +long long int xx, u2; + +void +qv (int js, int wl) +{ + if (js != 0) + { + short int sc; + int *at = (int *)≻ + long long int gx = 0; + + for (;;) + { + *at = 0; + js /= sc; + + for (wl = 0; wl < 2; ++wl) + { + xx = gx; + u2 %= xx > 0; + cz /= u2; + + fa: + if (cz != u2) + { + gx |= js; + cz = gx / js; + } + } + } + + yq: + wl /= 0x80000000; + u2 = wl; + u2 |= (wl != 0) | (wl != 0 && gx != 0); + js = u2; + goto fa; + } + goto yq; +}