Message ID | 20170202202923.GC91024@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
On Thu, Feb 2, 2017 at 12:29 PM, Jan Hubicka <hubicka@ucw.cz> wrote: > Hi, > this patches fixes profile updating in the ifcombine. This is not hard to do > and ifcombine is #2 profile update offender out of tree passes (#1 is the > vectorizer). > > I think this counts as a regression, becuase one can trigger arbitrarily > bad profile after ifconversion and defnitly construct a testcase where this > will cause us optimize for size where we optimized for speed previously. This might be related to PR 78200 but I could be wrong. Thanks, Andrew > > Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers > pick up the threading fix) unless there are complains. > > * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches. > > * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function. > (ifcombine_ifandif): Use it. > > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -14,3 +14,4 @@ int foo (int x, int a, int b) > } > > /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-optimized" } */ > +/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -17,3 +17,4 @@ int f(int x, int a, int b) > return t; > } > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > int g(void); > @@ -18,3 +18,4 @@ int f(int x, int a, int b) > } > > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized" } */ > +/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -17,3 +17,4 @@ int f(int x, int a, int b) > return t; > } > /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O1 -fdump-tree-optimized" } */ > +/* { dg-options "-O1 -fdump-tree-optimized-details-blocks" } */ > /* { dg-additional-options "-mbranch-cost=2" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } */ > > _Bool f1(_Bool a, _Bool b) > @@ -18,3 +18,4 @@ _Bool f1(_Bool a, _Bool b) > /* For LOGICAL_OP_NON_SHORT_CIRCUIT, this should be optimized > into return a & b;, with no ifs. */ > /* { dg-final { scan-tree-dump-not "if" "optimized" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase for PR31657. */ > > @@ -20,3 +20,4 @@ doit: > } > > /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase extracted from PR15353. */ > > @@ -20,3 +20,4 @@ doit: > } > > /* { dg-final { scan-tree-dump ">=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase extracted from PR15353. */ > > @@ -18,3 +18,4 @@ void foo (int x, int a) > } > > /* { dg-final { scan-tree-dump "!=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-optimized" } */ > +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ > > /* Testcase from PR15353. */ > > @@ -17,3 +17,4 @@ int f(int *i, int *j) > } > > /* { dg-final { scan-tree-dump ">=" "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ > > void bar (void); > > @@ -34,3 +34,4 @@ foo2 (unsigned int a) > > /* { dg-final { scan-tree-dump "optimizing bits or bits test" "ifcombine" } } */ > /* { dg-final { scan-tree-dump "optimizing double bit test" "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ > > int test1 (int i, int j) > { > @@ -12,3 +12,4 @@ int test1 (int i, int j) > /* The above should be optimized to a i > j test by ifcombine. */ > > /* { dg-final { scan-tree-dump " > " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine" } */ > +/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */ > > double test1 (double i, double j) > { > @@ -22,3 +22,4 @@ plouf: > Instead we get u<=, which is acceptable with -fno-trapping-math. */ > > /* { dg-final { scan-tree-dump " u<= " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (revision 245134) > +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine" } */ > +/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */ > > void f (); > enum Sign { NEG=-1, ZERO, POS }; > @@ -19,3 +19,4 @@ void g (double x) > The transformation would also be legal with -ftrapping-math. */ > > /* { dg-final { scan-tree-dump "optimizing.* < " "ifcombine" } } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ > Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-ethread" } */ > +char *c; > +int t() > +{ > + for (int i=0;i<5000;i++) > + c[i]=i; > +} > +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */ > Index: tree-ssa-ifcombine.c > =================================================================== > --- tree-ssa-ifcombine.c (revision 245134) > +++ tree-ssa-ifcombine.c (working copy) > @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree * > return true; > } > > + > +/* Update profile after code in outer_cond_bb was adjuted so > + outer_cond_bb has no condition. */ > + > +static void > +update_profile_after_ifcombine (basic_block inner_cond_bb, > + basic_block outer_cond_bb) > +{ > + edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb); > + edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner > + ? EDGE_SUCC (outer_cond_bb, 1) > + : EDGE_SUCC (outer_cond_bb, 0)); > + edge inner_taken = EDGE_SUCC (inner_cond_bb, 0); > + edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1); > + > + if (inner_taken->dest != outer2->dest) > + std::swap (inner_taken, inner_not_taken); > + gcc_assert (inner_taken->dest == outer2->dest); > + > + /* In the following we assume that inner_cond_bb has single predecessor. */ > + gcc_assert (single_pred_p (inner_cond_bb)); > + > + /* Path outer_cond_bb->(outer2) needs to be merged into path > + outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) > + and probability of inner_not_taken updated. */ > + > + outer_to_inner->count = outer_cond_bb->count; > + inner_cond_bb->count = outer_cond_bb->count; > + inner_taken->count += outer2->count; > + outer2->count = 0; > + > + inner_taken->probability = outer2->probability > + + RDIV (outer_to_inner->probability > + * inner_taken->probability, > + REG_BR_PROB_BASE); > + if (inner_taken->probability > REG_BR_PROB_BASE) > + inner_taken->probability = REG_BR_PROB_BASE; > + inner_not_taken->probability = REG_BR_PROB_BASE > + - inner_taken->probability; > + > + outer_to_inner->probability = REG_BR_PROB_BASE; > + inner_cond_bb->frequency = outer_cond_bb->frequency; > + outer2->probability = 0; > +} > + > /* If-convert on a and pattern with a common else block. The inner > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. > inner_inv, outer_inv and result_inv indicate whether the conditions > @@ -394,6 +439,7 @@ ifcombine_ifandif (basic_block inner_con > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > { > fprintf (dump_file, "optimizing double bit test to "); > @@ -471,6 +518,7 @@ ifcombine_ifandif (basic_block inner_con > gimple_cond_set_condition_from_tree (outer_cond, > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > { > @@ -554,6 +602,7 @@ ifcombine_ifandif (basic_block inner_con > gimple_cond_set_condition_from_tree (outer_cond, > outer_inv ? boolean_false_node : boolean_true_node); > update_stmt (outer_cond); > + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); > > if (dump_file) > {
On 02/02/2017 01:29 PM, Jan Hubicka wrote: > Hi, > this patches fixes profile updating in the ifcombine. This is not hard to do > and ifcombine is #2 profile update offender out of tree passes (#1 is the > vectorizer). > > I think this counts as a regression, becuase one can trigger arbitrarily > bad profile after ifconversion and defnitly construct a testcase where this > will cause us optimize for size where we optimized for speed previously. > > Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after testers > pick up the threading fix) unless there are complains. > > * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile mismatches. > * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile mismatches. > > * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New function. > (ifcombine_ifandif): Use it. > > Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-ethread" } */ > +char *c; > +int t() > +{ > + for (int i=0;i<5000;i++) > + c[i]=i; > +} > +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */ Seems like this is unrelated to fixing profile updates in ifcombine. If you want the new test it's probably best done in another independent patch :-) > Index: tree-ssa-ifcombine.c > =================================================================== > --- tree-ssa-ifcombine.c (revision 245134) > +++ tree-ssa-ifcombine.c (working copy) > @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree * > return true; > } > > + > +/* Update profile after code in outer_cond_bb was adjuted so s/adjuted/adjusted/ OK with the nit fixed. jeff
Hi, On 3 February 2017 at 08:56, Jeff Law <law@redhat.com> wrote: > On 02/02/2017 01:29 PM, Jan Hubicka wrote: >> >> Hi, >> this patches fixes profile updating in the ifcombine. This is not hard to >> do >> and ifcombine is #2 profile update offender out of tree passes (#1 is the >> vectorizer). >> >> I think this counts as a regression, becuase one can trigger arbitrarily >> bad profile after ifconversion and defnitly construct a testcase where >> this >> will cause us optimize for size where we optimized for speed previously. >> >> Bootstrapped/regtested x86_64-linux. Will commit it tomorrow (after >> testers >> pick up the threading fix) unless there are complains. >> >> * gcc.dg/tree-ssa/ssa-ifcombine-1.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-2.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-3.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-4.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-5.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-6.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-7.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-8.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-9.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-10.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-11.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-12.c: Check for no profile >> mismatches. >> * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Check for no profile >> mismatches. >> >> * tree-ssa-ifcombine.c (update_profile_after_ifcombine): New >> function. >> (ifcombine_ifandif): Use it. >> >> Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c >> =================================================================== >> --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) >> +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) >> @@ -0,0 +1,9 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fdump-tree-ethread" } */ >> +char *c; >> +int t() >> +{ >> + for (int i=0;i<5000;i++) >> + c[i]=i; >> +} >> +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 >> "ethread"} } */ > > Seems like this is unrelated to fixing profile updates in ifcombine. If you > want the new test it's probably best done in another independent patch :-) > > >> Index: tree-ssa-ifcombine.c >> =================================================================== >> --- tree-ssa-ifcombine.c (revision 245134) >> +++ tree-ssa-ifcombine.c (working copy) >> @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree * >> return true; >> } >> >> + >> +/* Update profile after code in outer_cond_bb was adjuted so > > s/adjuted/adjusted/ > > OK with the nit fixed. > > jeff > After this patch (r245151), I've noticed regressions on aarch64: gcc.target/aarch64/test_frame_1.c scan-assembler-times ldr\tx30, \\[sp\\], [0-9]+ 2 gcc.target/aarch64/test_frame_2.c scan-assembler-times ldp\tx19, x30, \\[sp\\], [0-9]+ 1 gcc.target/aarch64/test_frame_4.c scan-assembler-times ldp\tx19, x30, \\[sp\\], [0-9]+ 1 gcc.target/aarch64/test_frame_6.c scan-assembler-times ldr\tx30, \\[sp\\] 2 gcc.target/aarch64/test_frame_7.c scan-assembler-times ldp\tx19, x30, \\[sp\\] 1 now FAIL. For instance in test_frame_1.c, we used to generate: ccmp w2, w1, 0, eq bne .L6 ldrb w0, [sp, 123] ldrb w1, [sp, 124] cmp w0, 99 ccmp w1, w0, 0, eq cset w0, eq .L6: ldr x30, [sp], 224 ret and now: ccmp w2, w1, 0, eq beq .L11 ldr x30, [sp], 224 ret .p2align 3 .L11: ldrb w0, [sp, 123] ldrb w1, [sp, 124] cmp w0, 99 ldr x30, [sp], 224 ccmp w1, w0, 0, eq cset w0, eq ret which is 2 instructions more, as the control flow is less efficient. Do we want to just update the tests (increasing the number of expected str/ldr/stp/ldp to match the new code generation), or do we consider this a regression caused by this patch? Christophe
> > After this patch (r245151), I've noticed regressions on aarch64: > gcc.target/aarch64/test_frame_1.c scan-assembler-times ldr\tx30, > \\[sp\\], [0-9]+ 2 > gcc.target/aarch64/test_frame_2.c scan-assembler-times ldp\tx19, > x30, \\[sp\\], [0-9]+ 1 > gcc.target/aarch64/test_frame_4.c scan-assembler-times ldp\tx19, > x30, \\[sp\\], [0-9]+ 1 > gcc.target/aarch64/test_frame_6.c scan-assembler-times ldr\tx30, \\[sp\\] 2 > gcc.target/aarch64/test_frame_7.c scan-assembler-times ldp\tx19, > x30, \\[sp\\] 1 > > now FAIL. > > For instance in test_frame_1.c, we used to generate: > ccmp w2, w1, 0, eq > bne .L6 > ldrb w0, [sp, 123] > ldrb w1, [sp, 124] > cmp w0, 99 > ccmp w1, w0, 0, eq > cset w0, eq > .L6: > ldr x30, [sp], 224 > ret > > and now: > ccmp w2, w1, 0, eq > beq .L11 > ldr x30, [sp], 224 > ret > .p2align 3 > .L11: > ldrb w0, [sp, 123] > ldrb w1, [sp, 124] > cmp w0, 99 > ldr x30, [sp], 224 > ccmp w1, w0, 0, eq > cset w0, eq > ret > > which is 2 instructions more, as the control flow is less efficient. > > Do we want to just update the tests (increasing the number of expected > str/ldr/stp/ldp to match the new code generation), or do we > consider this a regression caused by this patch? I think it is not a regression, just the testcase if fragile and depends on outcome of ifcombine. It seems it was updated several time in the past. I am not quite sure what the test is testing, but probably just updating the testcase and/or disabling the ifconvert pass for it is the right answer. Honza
On 06/02/17 15:26, Jan Hubicka wrote: > I think it is not a regression, just the testcase if fragile and depends on outcome > of ifcombine. It seems it was updated several time in the past. I am not quite > sure what the test is testing. They are tring to make sure optimal stack adjustment decisions are made. Fix the testcases by disabling relevant transformation passes looks one way to me. The other way, might be more reliable, is we dump the decisions made during aarch64 frame layout if dump_file be true, and prefix the dump entry by function name to make it easier caught by dejagnu. We then scan rtl dump instead of instructions.
On 06/02/17 15:54, Jiong Wang wrote: > On 06/02/17 15:26, Jan Hubicka wrote: >> I think it is not a regression, just the testcase if fragile and >> depends on outcome >> of ifcombine. It seems it was updated several time in the past. I am >> not quite >> sure what the test is testing. > > They are tring to make sure optimal stack adjustment decisions are made. > > Fix the testcases by disabling relevant transformation passes looks one > way to > me. The other way, might be more reliable, is we dump the decisions > made during > aarch64 frame layout if dump_file be true, and prefix the dump entry by > function > name to make it easier caught by dejagnu. We then scan rtl dump instead of > instructions. > > We only care that the epilogue instructions appear at least once. So for the epilogue we should probably just adjust the tests to use scan-assembler, rather than scan-assembler-times. R.
Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-1.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase for PR31657. */ @@ -14,3 +14,4 @@ int foo (int x, int a, int b) } /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-10.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */ /* Testcase for PR31657. */ @@ -17,3 +17,4 @@ int f(int x, int a, int b) return t; } /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-11.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase for PR31657. */ int g(void); @@ -18,3 +18,4 @@ int f(int x, int a, int b) } /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-12.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-optimized-details-blocks" } */ /* Testcase for PR31657. */ @@ -17,3 +17,4 @@ int f(int x, int a, int b) return t; } /* { dg-final { scan-tree-dump "& 5" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-13.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O1 -fdump-tree-optimized" } */ +/* { dg-options "-O1 -fdump-tree-optimized-details-blocks" } */ /* { dg-additional-options "-mbranch-cost=2" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } */ _Bool f1(_Bool a, _Bool b) @@ -18,3 +18,4 @@ _Bool f1(_Bool a, _Bool b) /* For LOGICAL_OP_NON_SHORT_CIRCUIT, this should be optimized into return a & b;, with no ifs. */ /* { dg-final { scan-tree-dump-not "if" "optimized" { target { i?86-*-* x86_64-*-* s390*-*-* avr*-*-* } } } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase for PR31657. */ @@ -20,3 +20,4 @@ doit: } /* { dg-final { scan-tree-dump "\\|" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-3.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase extracted from PR15353. */ @@ -20,3 +20,4 @@ doit: } /* { dg-final { scan-tree-dump ">=" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-4.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase extracted from PR15353. */ @@ -18,3 +18,4 @@ void foo (int x, int a) } /* { dg-final { scan-tree-dump "!=" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-5.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-optimized" } */ +/* { dg-options "-O -fdump-tree-optimized-details-blocks" } */ /* Testcase from PR15353. */ @@ -17,3 +17,4 @@ int f(int *i, int *j) } /* { dg-final { scan-tree-dump ">=" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-6.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-ifcombine" } */ +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ void bar (void); @@ -34,3 +34,4 @@ foo2 (unsigned int a) /* { dg-final { scan-tree-dump "optimizing bits or bits test" "ifcombine" } } */ /* { dg-final { scan-tree-dump "optimizing double bit test" "ifcombine" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-7.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-ifcombine" } */ +/* { dg-options "-O -fdump-tree-ifcombine-details-blocks" } */ int test1 (int i, int j) { @@ -12,3 +12,4 @@ int test1 (int i, int j) /* The above should be optimized to a i > j test by ifcombine. */ /* { dg-final { scan-tree-dump " > " "ifcombine" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-8.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine" } */ +/* { dg-options "-O -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */ double test1 (double i, double j) { @@ -22,3 +22,4 @@ plouf: Instead we get u<=, which is acceptable with -fno-trapping-math. */ /* { dg-final { scan-tree-dump " u<= " "ifcombine" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ Index: testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c =================================================================== --- testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (revision 245134) +++ testsuite/gcc.dg/tree-ssa/ssa-ifcombine-9.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine" } */ +/* { dg-options "-O2 -fno-trapping-math -fdump-tree-ifcombine-details-blocks" } */ void f (); enum Sign { NEG=-1, ZERO, POS }; @@ -19,3 +19,4 @@ void g (double x) The transformation would also be legal with -ftrapping-math. */ /* { dg-final { scan-tree-dump "optimizing.* < " "ifcombine" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "ifcombine" } } */ Index: testsuite/gcc.dg/tree-ssa/threadbackward-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/threadbackward-1.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/threadbackward-1.c (working copy) @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ethread" } */ +char *c; +int t() +{ + for (int i=0;i<5000;i++) + c[i]=i; +} +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 1 "ethread"} } */ Index: tree-ssa-ifcombine.c =================================================================== --- tree-ssa-ifcombine.c (revision 245134) +++ tree-ssa-ifcombine.c (working copy) @@ -332,6 +332,51 @@ recognize_bits_test (gcond *cond, tree * return true; } + +/* Update profile after code in outer_cond_bb was adjuted so + outer_cond_bb has no condition. */ + +static void +update_profile_after_ifcombine (basic_block inner_cond_bb, + basic_block outer_cond_bb) +{ + edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb); + edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner + ? EDGE_SUCC (outer_cond_bb, 1) + : EDGE_SUCC (outer_cond_bb, 0)); + edge inner_taken = EDGE_SUCC (inner_cond_bb, 0); + edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1); + + if (inner_taken->dest != outer2->dest) + std::swap (inner_taken, inner_not_taken); + gcc_assert (inner_taken->dest == outer2->dest); + + /* In the following we assume that inner_cond_bb has single predecessor. */ + gcc_assert (single_pred_p (inner_cond_bb)); + + /* Path outer_cond_bb->(outer2) needs to be merged into path + outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) + and probability of inner_not_taken updated. */ + + outer_to_inner->count = outer_cond_bb->count; + inner_cond_bb->count = outer_cond_bb->count; + inner_taken->count += outer2->count; + outer2->count = 0; + + inner_taken->probability = outer2->probability + + RDIV (outer_to_inner->probability + * inner_taken->probability, + REG_BR_PROB_BASE); + if (inner_taken->probability > REG_BR_PROB_BASE) + inner_taken->probability = REG_BR_PROB_BASE; + inner_not_taken->probability = REG_BR_PROB_BASE + - inner_taken->probability; + + outer_to_inner->probability = REG_BR_PROB_BASE; + inner_cond_bb->frequency = outer_cond_bb->frequency; + outer2->probability = 0; +} + /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. inner_inv, outer_inv and result_inv indicate whether the conditions @@ -394,6 +439,7 @@ ifcombine_ifandif (basic_block inner_con outer_inv ? boolean_false_node : boolean_true_node); update_stmt (outer_cond); + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); if (dump_file) { fprintf (dump_file, "optimizing double bit test to "); @@ -471,6 +518,7 @@ ifcombine_ifandif (basic_block inner_con gimple_cond_set_condition_from_tree (outer_cond, outer_inv ? boolean_false_node : boolean_true_node); update_stmt (outer_cond); + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); if (dump_file) { @@ -554,6 +602,7 @@ ifcombine_ifandif (basic_block inner_con gimple_cond_set_condition_from_tree (outer_cond, outer_inv ? boolean_false_node : boolean_true_node); update_stmt (outer_cond); + update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); if (dump_file) {