Message ID | 20211208055416.1415283-4-luoxhu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | Dependency patches for hoist LIM code to cold loop | expand |
On 12/7/2021 10:54 PM, Xionghu Luo via Gcc-patches wrote: > In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two > kind of split. split_loop only works for single loop and insert edge at > exit when split, while split_loop_on_cond is not limited to single loop > and insert edge at latch when split. Both split behavior should consider > loop count and probability update. For split_loop, loop split condition > is moved in front of loop1 and loop2; But split_loop_on_cond moves the > condition between loop1 and loop2, this patch does: > 1) profile count proportion for both original loop and copied loop > without dropping down the true branch's count; > 2) probability update in the two loops and between the two loops. > > Regression tested pass, OK for master? > > Changes diff for split_loop and split_loop_on_cond cases: > > 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit > ... > <bb 2> [local count: 118111600]: > if (beg_5(D) < end_8(D)) > goto <bb 14>; [89.00%] > else > goto <bb 6>; [11.00%] > > <bb 14> [local count: 105119324]: > if (beg2_6(D) < c_9(D)) > - goto <bb 15>; [100.00%] > + goto <bb 15>; [33.00%] > else > - goto <bb 16>; [100.00%] > + goto <bb 16>; [67.00%] > > - <bb 15> [local count: 105119324]: > + <bb 15> [local count: 34689377]: > _25 = beg_5(D) + 1; > _26 = end_8(D) - beg_5(D); > _27 = beg2_6(D) + _26; > _28 = MIN_EXPR <c_9(D), _27>; > > - <bb 3> [local count: 955630225]: > + <bb 3> [local count: 315357973]: > # i_16 = PHI <i_11(8), beg_5(D)(15)> > # j_17 = PHI <j_12(8), beg2_6(D)(15)> > printf ("a: %d %d\n", i_16, j_17); > i_11 = i_16 + 1; > j_12 = j_17 + 1; > if (j_12 < _28) > - goto <bb 8>; [89.00%] > + goto <bb 8>; [29.37%] > else > - goto <bb 17>; [11.00%] > + goto <bb 17>; [70.63%] > > - <bb 8> [local count: 850510901]: > + <bb 8> [local count: 280668596]: > goto <bb 3>; [100.00%] > > - <bb 16> [local count: 105119324]: > + <bb 16> [local count: 70429947]: > # i_22 = PHI <beg_5(D)(14), i_29(17)> > # j_23 = PHI <beg2_6(D)(14), j_30(17)> > > <bb 10> [local count: 955630225]: > # i_2 = PHI <i_22(16), i_20(13)> > # j_1 = PHI <j_23(16), j_21(13)> > i_20 = i_2 + 1; > j_21 = j_1 + 1; > if (end_8(D) > i_20) > - goto <bb 13>; [89.00%] > + goto <bb 13>; [59.63%] > else > - goto <bb 9>; [11.00%] > + goto <bb 9>; [40.37%] > > - <bb 13> [local count: 850510901]: > + <bb 13> [local count: 569842305]: > goto <bb 10>; [100.00%] > > <bb 17> [local count: 105119324]: > # i_29 = PHI <i_11(3)> > # j_30 = PHI <j_12(3)> > if (end_8(D) > i_29) > goto <bb 16>; [80.00%] > else > goto <bb 9>; [20.00%] > > <bb 9> [local count: 105119324]: > > <bb 6> [local count: 118111600]: > return 0; > > } > <bb 2> [local count: 118111600]: > - if (beg_5(D) < end_8(D)) > + _1 = end_6(D) - beg_7(D); > + j_9 = _1 + beg2_8(D); > + if (end_6(D) > beg_7(D)) > goto <bb 14>; [89.00%] > else > goto <bb 6>; [11.00%] > > <bb 14> [local count: 105119324]: > - if (beg2_6(D) < c_9(D)) > - goto <bb 15>; [100.00%] > + if (j_9 >= c_11(D)) > + goto <bb 15>; [33.00%] > else > - goto <bb 16>; [100.00%] > + goto <bb 16>; [67.00%] > > - <bb 15> [local count: 105119324]: > - _25 = beg_5(D) + 1; > - _26 = end_8(D) - beg_5(D); > - _27 = beg2_6(D) + _26; > - _28 = MIN_EXPR <c_9(D), _27>; > - > - <bb 3> [local count: 955630225]: > - # i_16 = PHI <i_11(8), beg_5(D)(15)> > - # j_17 = PHI <j_12(8), beg2_6(D)(15)> > - printf ("a: %d %d\n", i_16, j_17); > - i_11 = i_16 + 1; > - j_12 = j_17 + 1; > - if (j_12 < _28) > - goto <bb 8>; [89.00%] > + <bb 15> [local count: 34689377]: > + _27 = end_6(D) + -1; > + _28 = beg_7(D) - end_6(D); > + _29 = j_9 + _28; > + _30 = _29 + 1; > + _31 = MAX_EXPR <c_11(D), _30>; > + > + <bb 3> [local count: 315357973]: > + # i_18 = PHI <i_13(8), end_6(D)(15)> > + # j_19 = PHI <j_14(8), j_9(15)> > + printf ("a: %d %d\n", i_18, j_19); > + i_13 = i_18 + -1; > + j_14 = j_19 + -1; > + if (j_14 >= _31) > + goto <bb 8>; [29.37%] > else > - goto <bb 17>; [11.00%] > + goto <bb 17>; [70.63%] > > - <bb 8> [local count: 850510901]: > + <bb 8> [local count: 280668596]: > goto <bb 3>; [100.00%] > > - <bb 16> [local count: 105119324]: > - # i_22 = PHI <beg_5(D)(14), i_29(17)> > - # j_23 = PHI <beg2_6(D)(14), j_30(17)> > + <bb 16> [local count: 70429947]: > + # i_24 = PHI <end_6(D)(14), i_32(17)> > + # j_25 = PHI <j_9(14), j_33(17)> > > <bb 10> [local count: 955630225]: > - # i_2 = PHI <i_22(16), i_20(13)> > - # j_1 = PHI <j_23(16), j_21(13)> > - i_20 = i_2 + 1; > - j_21 = j_1 + 1; > - if (end_8(D) > i_20) > + # i_3 = PHI <i_24(16), i_22(13)> > + # j_2 = PHI <j_25(16), j_23(13)> > + i_22 = i_3 + -1; > + j_23 = j_2 + -1; > + if (beg_7(D) < i_22) > goto <bb 13>; [89.00%] > else > goto <bb 9>; [11.00%] > > - <bb 13> [local count: 850510901]: > + <bb 13> [local count: 569842305]: > goto <bb 10>; [100.00%] > > <bb 17> [local count: 105119324]: > - # i_29 = PHI <i_11(3)> > - # j_30 = PHI <j_12(3)> > - if (end_8(D) > i_29) > + # i_32 = PHI <i_13(3)> > + # j_33 = PHI <j_14(3)> > + if (beg_7(D) < i_32) > goto <bb 16>; [80.00%] > else > goto <bb 9>; [20.00%] > > <bb 9> [local count: 105119324]: > > <bb 6> [local count: 118111600]: > return 0; > > } > > 2) diff base/loop-cond-split-1.c.151t.lsplit patched/loop-cond-split-1.c.151t.lsplit: > ... > <bb 2> [local count: 118111600]: > if (n_7(D) > 0) > goto <bb 4>; [89.00%] > else > goto <bb 3>; [11.00%] > > <bb 3> [local count: 118111600]: > return; > > <bb 4> [local count: 105119324]: > pretmp_3 = ga; > > - <bb 5> [local count: 955630225]: > + <bb 5> [local count: 315357973]: > # i_13 = PHI <i_10(20), 0(4)> > # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)> > if (prephitmp_12 != 0) > goto <bb 6>; [33.00%] > else > goto <bb 7>; [67.00%] > > <bb 6> [local count: 315357972]: > _2 = do_something (); > ga = _2; > > - <bb 7> [local count: 955630225]: > + <bb 7> [local count: 315357973]: > # prephitmp_5 = PHI <prephitmp_12(5), _2(6)> > i_10 = inc (i_13); > if (n_7(D) > i_10) > goto <bb 21>; [89.00%] > else > goto <bb 11>; [11.00%] > > <bb 11> [local count: 105119324]: > goto <bb 3>; [100.00%] > > - <bb 21> [local count: 850510901]: > + <bb 21> [local count: 280668596]: > if (prephitmp_12 != 0) > - goto <bb 20>; [100.00%] > + goto <bb 20>; [33.00%] > else > - goto <bb 19>; [INV] > + goto <bb 19>; [67.00%] > > - <bb 20> [local count: 850510901]: > + <bb 20> [local count: 280668596]: > goto <bb 5>; [100.00%] > > - <bb 19> [count: 0]: > + <bb 19> [local count: 70429947]: > # i_23 = PHI <i_10(21)> > # prephitmp_25 = PHI <prephitmp_5(21)> > > - <bb 12> [local count: 955630225]: > + <bb 12> [local count: 640272252]: > # i_15 = PHI <i_23(19), i_22(16)> > # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)> > i_22 = inc (i_15); > if (n_7(D) > i_22) > goto <bb 16>; [89.00%] > else > goto <bb 11>; [11.00%] > > - <bb 16> [local count: 850510901]: > + <bb 16> [local count: 569842305]: > goto <bb 12>; [100.00%] > > } > > gcc/ChangeLog: > > * tree-ssa-loop-split.c (split_loop): Fix incorrect > profile_count and probability. > (do_split_loop_on_cond): Likewise. > --- > gcc/tree-ssa-loop-split.c | 85 +++++++++++++++++++++++++++++++++++---- > 1 file changed, 77 insertions(+), 8 deletions(-) > > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3f6ad046623..33128061aab 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > > @@ -607,6 +610,38 @@ split_loop (class loop *loop1) > tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); > patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true); > > + update_ssa (TODO_update_ssa); > + > + /* Proportion first loop's bb counts except those dominated by true > + branch to avoid drop 1s down. */ > + basic_block *bbs1, *bbs2; > + bbs1 = get_loop_body (loop1); > + unsigned j; > + for (j = 0; j < loop1->num_nodes; j++) > + if (bbs1[j] == loop1->latch > + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) > + bbs1[j]->count > + = bbs1[j]->count.apply_probability (true_edge->probability); > + free (bbs1); It looks like there's two copies of this code in this patch, one in split_loop and the other in do_split_loop_on_cond. Would it make sense to factor it out into its own little function? > + > + /* Proportion second loop's bb counts except those dominated by false > + branch to avoid drop 1s down. */ > + basic_block bbi_copy = get_bb_copy (false_edge->dest); > + bbs2 = get_loop_body (loop2); > + for (j = 0; j < loop2->num_nodes; j++) > + if (bbs2[j] == loop2->latch > + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) > + bbs2[j]->count = bbs2[j]->count.apply_probability ( > + true_edge->probability.invert ()); > + free (bbs2); Similarly for this block of code. If those can be reasonably factored out into two helper functions to be called from split_loop and do_split_loop_on_cond, then this is OK with the refactoring. jeff
On 2021/12/9 07:47, Jeff Law wrote: >> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c >> index 3f6ad046623..33128061aab 100644 >> --- a/gcc/tree-ssa-loop-split.c >> +++ b/gcc/tree-ssa-loop-split.c >> >> @@ -607,6 +610,38 @@ split_loop (class loop *loop1) >> tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge >> (loop1)); >> patch_loop_exit (loop1, guard_stmt, guard_next, newend, >> initial_true); >> + update_ssa (TODO_update_ssa); >> + >> + /* Proportion first loop's bb counts except those dominated by true >> + branch to avoid drop 1s down. */ >> + basic_block *bbs1, *bbs2; >> + bbs1 = get_loop_body (loop1); >> + unsigned j; >> + for (j = 0; j < loop1->num_nodes; j++) >> + if (bbs1[j] == loop1->latch >> + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) >> + bbs1[j]->count >> + = bbs1[j]->count.apply_probability (true_edge->probability); >> + free (bbs1); > It looks like there's two copies of this code in this patch, one in > split_loop and the other in do_split_loop_on_cond. Would it make sense > to factor it out into its own little function? > > >> + >> + /* Proportion second loop's bb counts except those dominated by >> false >> + branch to avoid drop 1s down. */ >> + basic_block bbi_copy = get_bb_copy (false_edge->dest); >> + bbs2 = get_loop_body (loop2); >> + for (j = 0; j < loop2->num_nodes; j++) >> + if (bbs2[j] == loop2->latch >> + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) >> + bbs2[j]->count = bbs2[j]->count.apply_probability ( >> + true_edge->probability.invert ()); >> + free (bbs2); > Similarly for this block of code. > > If those can be reasonably factored out into two helper functions to be > called from split_loop and do_split_loop_on_cond, then this is OK with > the refactoring. > > jeff Thanks for the comments, updated as below. Will commit this patchset and the approved patch for LIM if there are no objections: [PATCH v2 3/3] Fix loop split incorrect count and probability In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two kind of split. split_loop only works for single loop and insert edge at exit when split, while split_loop_on_cond is not limited to single loop and insert edge at latch when split. Both split behavior should consider loop count and probability update. For split_loop, loop split condition is moved in front of loop1 and loop2; But split_loop_on_cond moves the condition between loop1 and loop2, this patch does: 1) profile count proportion for both original loop and copied loop without dropping down the true branch's count; 2) probability update in the two loops and between the two loops. Regression tested pass, OK for master? Changes diff for split_loop and split_loop_on_cond cases: 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit ... <bb 2> [local count: 118111600]: _1 = end_6(D) - beg_7(D); j_9 = _1 + beg2_8(D); if (end_6(D) > beg_7(D)) goto <bb 14>; [89.00%] else goto <bb 6>; [11.00%] <bb 14> [local count: 105119324]: if (j_9 >= c_11(D)) - goto <bb 15>; [100.00%] + goto <bb 15>; [33.00%] else - goto <bb 16>; [100.00%] + goto <bb 16>; [67.00%] - <bb 15> [local count: 105119324]: + <bb 15> [local count: 34689377]: _27 = end_6(D) + -1; _28 = beg_7(D) - end_6(D); _29 = j_9 + _28; _30 = _29 + 1; _31 = MAX_EXPR <c_11(D), _30>; - <bb 3> [local count: 955630225]: + <bb 3> [local count: 315357973]: # i_18 = PHI <i_13(8), end_6(D)(15)> # j_19 = PHI <j_14(8), j_9(15)> printf ("a: %d %d\n", i_18, j_19); i_13 = i_18 + -1; j_14 = j_19 + -1; if (j_14 >= _31) - goto <bb 8>; [89.00%] + goto <bb 8>; [29.37%] else - goto <bb 17>; [11.00%] + goto <bb 17>; [70.63%] - <bb 8> [local count: 850510901]: + <bb 8> [local count: 280668596]: goto <bb 3>; [100.00%] - <bb 16> [local count: 105119324]: + <bb 16> [local count: 70429947]: # i_24 = PHI <end_6(D)(14), i_32(17)> # j_25 = PHI <j_9(14), j_33(17)> <bb 10> [local count: 955630225]: # i_3 = PHI <i_24(16), i_22(13)> # j_2 = PHI <j_25(16), j_23(13)> i_22 = i_3 + -1; j_23 = j_2 + -1; if (beg_7(D) < i_22) goto <bb 13>; [89.00%] else goto <bb 9>; [11.00%] - <bb 13> [local count: 850510901]: + <bb 13> [local count: 569842305]: goto <bb 10>; [100.00%] <bb 17> [local count: 105119324]: # i_32 = PHI <i_13(3)> # j_33 = PHI <j_14(3)> if (beg_7(D) < i_32) goto <bb 16>; [80.00%] else goto <bb 9>; [20.00%] <bb 9> [local count: 105119324]: <bb 6> [local count: 118111600]: return 0; } 2) diff base/loop-cond-split-1.c.151t.lsplit patched/loop-cond-split-1.c.151t.lsplit: ... <bb 2> [local count: 118111600]: if (n_7(D) > 0) goto <bb 4>; [89.00%] else goto <bb 3>; [11.00%] <bb 3> [local count: 118111600]: return; <bb 4> [local count: 105119324]: pretmp_3 = ga; - <bb 5> [local count: 955630225]: + <bb 5> [local count: 315357973]: # i_13 = PHI <i_10(20), 0(4)> # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)> if (prephitmp_12 != 0) goto <bb 6>; [33.00%] else goto <bb 7>; [67.00%] <bb 6> [local count: 315357972]: _2 = do_something (); ga = _2; - <bb 7> [local count: 955630225]: + <bb 7> [local count: 315357973]: # prephitmp_5 = PHI <prephitmp_12(5), _2(6)> i_10 = inc (i_13); if (n_7(D) > i_10) goto <bb 21>; [89.00%] else goto <bb 11>; [11.00%] <bb 11> [local count: 105119324]: goto <bb 3>; [100.00%] - <bb 21> [local count: 850510901]: + <bb 21> [local count: 280668596]: if (prephitmp_12 != 0) - goto <bb 20>; [100.00%] + goto <bb 20>; [33.00%] else - goto <bb 19>; [INV] + goto <bb 19>; [67.00%] - <bb 20> [local count: 850510901]: + <bb 20> [local count: 280668596]: goto <bb 5>; [100.00%] - <bb 19> [count: 0]: + <bb 19> [local count: 70429947]: # i_23 = PHI <i_10(21)> # prephitmp_25 = PHI <prephitmp_5(21)> - <bb 12> [local count: 955630225]: + <bb 12> [local count: 640272252]: # i_15 = PHI <i_23(19), i_22(16)> # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)> i_22 = inc (i_15); if (n_7(D) > i_22) goto <bb 16>; [89.00%] else goto <bb 11>; [11.00%] - <bb 16> [local count: 850510901]: + <bb 16> [local count: 569842305]: goto <bb 12>; [100.00%] } gcc/ChangeLog: * tree-ssa-loop-split.c (Fix_loop_bb_probability): New function. (split_loop): Fix incorrect profile_count and probability. (do_split_loop_on_cond): Likewise. --- gcc/tree-ssa-loop-split.c | 72 ++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3f6ad046623..55011aeed97 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -484,6 +484,39 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter, return newend; } +/* Fix the two loop's bb count after split based on the split edge probability, + don't adjust the bbs dominated by true branches of that loop to avoid + dropping 1s down. */ +static void +fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge, + edge false_edge) +{ + update_ssa (TODO_update_ssa); + + /* Proportion first loop's bb counts except those dominated by true + branch to avoid drop 1s down. */ + basic_block *bbs1, *bbs2; + bbs1 = get_loop_body (loop1); + unsigned j; + for (j = 0; j < loop1->num_nodes; j++) + if (bbs1[j] == loop1->latch + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) + bbs1[j]->count + = bbs1[j]->count.apply_probability (true_edge->probability); + free (bbs1); + + /* Proportion second loop's bb counts except those dominated by false + branch to avoid drop 1s down. */ + basic_block bbi_copy = get_bb_copy (false_edge->dest); + bbs2 = get_loop_body (loop2); + for (j = 0; j < loop2->num_nodes; j++) + if (bbs2[j] == loop2->latch + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) + bbs2[j]->count + = bbs2[j]->count.apply_probability (true_edge->probability.invert ()); + free (bbs2); +} + /* Checks if LOOP contains an conditional block whose condition depends on which side in the iteration space it is, and if so splits the iteration space into two loops. Returns true if the @@ -575,7 +608,10 @@ split_loop (class loop *loop1) stmts2); tree cond = build2 (guard_code, boolean_type_node, guard_init, border); if (!initial_true) - cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + + edge true_edge, false_edge; + extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge); /* Now version the loop, placing loop2 after loop1 connecting them, and fix up SSA form for that. */ @@ -583,11 +619,11 @@ split_loop (class loop *loop1) basic_block cond_bb; class loop *loop2 = loop_version (loop1, cond, &cond_bb, - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - true); + true_edge->probability, + true_edge->probability.invert (), + profile_probability::always (), + profile_probability::always (), + true); gcc_assert (loop2); edge new_e = connect_loops (loop1, loop2); @@ -607,6 +643,15 @@ split_loop (class loop *loop1) tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true); + fix_loop_bb_probability (loop1, loop2, true_edge, false_edge); + + /* Fix first loop's exit probability after scaling. */ + edge exit_to_latch1 = single_pred_edge (loop1->latch); + exit_to_latch1->probability = exit_to_latch1->probability.apply_scale ( + true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE); + single_exit (loop1)->probability + = exit_to_latch1->probability.invert (); + /* Finally patch out the two copies of the condition to be always true/false (or opposite). */ gcond *force_true = as_a<gcond *> (last_stmt (bbs[i])); @@ -1486,8 +1531,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) initialize_original_copy_tables (); struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, - profile_probability::always (), - profile_probability::never (), + invar_branch->probability.invert (), + invar_branch->probability, profile_probability::always (), profile_probability::always (), true); @@ -1535,6 +1580,17 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) between loop1 and loop2. */ connect_loop_phis (loop1, loop2, to_loop2); + edge true_edge, false_edge, skip_edge1, skip_edge2; + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); + + skip_edge1 = true_invar ? false_edge : true_edge; + skip_edge2 = true_invar ? true_edge : false_edge; + fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2); + + /* Fix first loop's exit probability after scaling. */ + to_loop1->probability = invar_branch->probability.invert (); + to_loop2->probability = invar_branch->probability; + free_original_copy_tables (); return true;
On 2021/12/13 16:57, Xionghu Luo via Gcc-patches wrote: > > > On 2021/12/9 07:47, Jeff Law wrote: >>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c >>> index 3f6ad046623..33128061aab 100644 >>> --- a/gcc/tree-ssa-loop-split.c >>> +++ b/gcc/tree-ssa-loop-split.c >>> >>> @@ -607,6 +610,38 @@ split_loop (class loop *loop1) >>> tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge >>> (loop1)); >>> patch_loop_exit (loop1, guard_stmt, guard_next, newend, >>> initial_true); >>> + update_ssa (TODO_update_ssa); >>> + >>> + /* Proportion first loop's bb counts except those dominated by true >>> + branch to avoid drop 1s down. */ >>> + basic_block *bbs1, *bbs2; >>> + bbs1 = get_loop_body (loop1); >>> + unsigned j; >>> + for (j = 0; j < loop1->num_nodes; j++) >>> + if (bbs1[j] == loop1->latch >>> + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) >>> + bbs1[j]->count >>> + = bbs1[j]->count.apply_probability (true_edge->probability); >>> + free (bbs1); >> It looks like there's two copies of this code in this patch, one in >> split_loop and the other in do_split_loop_on_cond. Would it make sense >> to factor it out into its own little function? >> >> >>> + >>> + /* Proportion second loop's bb counts except those dominated by >>> false >>> + branch to avoid drop 1s down. */ >>> + basic_block bbi_copy = get_bb_copy (false_edge->dest); >>> + bbs2 = get_loop_body (loop2); >>> + for (j = 0; j < loop2->num_nodes; j++) >>> + if (bbs2[j] == loop2->latch >>> + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) >>> + bbs2[j]->count = bbs2[j]->count.apply_probability ( >>> + true_edge->probability.invert ()); >>> + free (bbs2); >> Similarly for this block of code. >> >> If those can be reasonably factored out into two helper functions to be >> called from split_loop and do_split_loop_on_cond, then this is OK with >> the refactoring. >> >> jeff > > > Thanks for the comments, updated as below. Will commit this patchset and the > approved patch for LIM if there are no objections: This patch is committed to r12-6086. > > > [PATCH v2 3/3] Fix loop split incorrect count and probability > > In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two > kind of split. split_loop only works for single loop and insert edge at > exit when split, while split_loop_on_cond is not limited to single loop > and insert edge at latch when split. Both split behavior should consider > loop count and probability update. For split_loop, loop split condition > is moved in front of loop1 and loop2; But split_loop_on_cond moves the > condition between loop1 and loop2, this patch does: > 1) profile count proportion for both original loop and copied loop > without dropping down the true branch's count; > 2) probability update in the two loops and between the two loops. > > Regression tested pass, OK for master? > > Changes diff for split_loop and split_loop_on_cond cases: > > 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit > ... > <bb 2> [local count: 118111600]: > _1 = end_6(D) - beg_7(D); > j_9 = _1 + beg2_8(D); > if (end_6(D) > beg_7(D)) > goto <bb 14>; [89.00%] > else > goto <bb 6>; [11.00%] > > <bb 14> [local count: 105119324]: > if (j_9 >= c_11(D)) > - goto <bb 15>; [100.00%] > + goto <bb 15>; [33.00%] > else > - goto <bb 16>; [100.00%] > + goto <bb 16>; [67.00%] > > - <bb 15> [local count: 105119324]: > + <bb 15> [local count: 34689377]: > _27 = end_6(D) + -1; > _28 = beg_7(D) - end_6(D); > _29 = j_9 + _28; > _30 = _29 + 1; > _31 = MAX_EXPR <c_11(D), _30>; > > - <bb 3> [local count: 955630225]: > + <bb 3> [local count: 315357973]: > # i_18 = PHI <i_13(8), end_6(D)(15)> > # j_19 = PHI <j_14(8), j_9(15)> > printf ("a: %d %d\n", i_18, j_19); > i_13 = i_18 + -1; > j_14 = j_19 + -1; > if (j_14 >= _31) > - goto <bb 8>; [89.00%] > + goto <bb 8>; [29.37%] > else > - goto <bb 17>; [11.00%] > + goto <bb 17>; [70.63%] > > - <bb 8> [local count: 850510901]: > + <bb 8> [local count: 280668596]: > goto <bb 3>; [100.00%] > > - <bb 16> [local count: 105119324]: > + <bb 16> [local count: 70429947]: > # i_24 = PHI <end_6(D)(14), i_32(17)> > # j_25 = PHI <j_9(14), j_33(17)> > > <bb 10> [local count: 955630225]: > # i_3 = PHI <i_24(16), i_22(13)> > # j_2 = PHI <j_25(16), j_23(13)> > i_22 = i_3 + -1; > j_23 = j_2 + -1; > if (beg_7(D) < i_22) > goto <bb 13>; [89.00%] > else > goto <bb 9>; [11.00%] > > - <bb 13> [local count: 850510901]: > + <bb 13> [local count: 569842305]: > goto <bb 10>; [100.00%] > > <bb 17> [local count: 105119324]: > # i_32 = PHI <i_13(3)> > # j_33 = PHI <j_14(3)> > if (beg_7(D) < i_32) > goto <bb 16>; [80.00%] > else > goto <bb 9>; [20.00%] > > <bb 9> [local count: 105119324]: > > <bb 6> [local count: 118111600]: > return 0; > > } > > 2) diff base/loop-cond-split-1.c.151t.lsplit patched/loop-cond-split-1.c.151t.lsplit: > ... > <bb 2> [local count: 118111600]: > if (n_7(D) > 0) > goto <bb 4>; [89.00%] > else > goto <bb 3>; [11.00%] > > <bb 3> [local count: 118111600]: > return; > > <bb 4> [local count: 105119324]: > pretmp_3 = ga; > > - <bb 5> [local count: 955630225]: > + <bb 5> [local count: 315357973]: > # i_13 = PHI <i_10(20), 0(4)> > # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)> > if (prephitmp_12 != 0) > goto <bb 6>; [33.00%] > else > goto <bb 7>; [67.00%] > > <bb 6> [local count: 315357972]: > _2 = do_something (); > ga = _2; > > - <bb 7> [local count: 955630225]: > + <bb 7> [local count: 315357973]: > # prephitmp_5 = PHI <prephitmp_12(5), _2(6)> > i_10 = inc (i_13); > if (n_7(D) > i_10) > goto <bb 21>; [89.00%] > else > goto <bb 11>; [11.00%] > > <bb 11> [local count: 105119324]: > goto <bb 3>; [100.00%] > > - <bb 21> [local count: 850510901]: > + <bb 21> [local count: 280668596]: > if (prephitmp_12 != 0) > - goto <bb 20>; [100.00%] > + goto <bb 20>; [33.00%] > else > - goto <bb 19>; [INV] > + goto <bb 19>; [67.00%] > > - <bb 20> [local count: 850510901]: > + <bb 20> [local count: 280668596]: > goto <bb 5>; [100.00%] > > - <bb 19> [count: 0]: > + <bb 19> [local count: 70429947]: > # i_23 = PHI <i_10(21)> > # prephitmp_25 = PHI <prephitmp_5(21)> > > - <bb 12> [local count: 955630225]: > + <bb 12> [local count: 640272252]: > # i_15 = PHI <i_23(19), i_22(16)> > # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)> > i_22 = inc (i_15); > if (n_7(D) > i_22) > goto <bb 16>; [89.00%] > else > goto <bb 11>; [11.00%] > > - <bb 16> [local count: 850510901]: > + <bb 16> [local count: 569842305]: > goto <bb 12>; [100.00%] > } > > gcc/ChangeLog: > > * tree-ssa-loop-split.c (Fix_loop_bb_probability): New function. > (split_loop): Fix incorrect profile_count and probability. > (do_split_loop_on_cond): Likewise. > --- > gcc/tree-ssa-loop-split.c | 72 ++++++++++++++++++++++++++++++++++----- > 1 file changed, 64 insertions(+), 8 deletions(-) > > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3f6ad046623..55011aeed97 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -484,6 +484,39 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter, > return newend; > } > > +/* Fix the two loop's bb count after split based on the split edge probability, > + don't adjust the bbs dominated by true branches of that loop to avoid > + dropping 1s down. */ > +static void > +fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge, > + edge false_edge) > +{ > + update_ssa (TODO_update_ssa); > + > + /* Proportion first loop's bb counts except those dominated by true > + branch to avoid drop 1s down. */ > + basic_block *bbs1, *bbs2; > + bbs1 = get_loop_body (loop1); > + unsigned j; > + for (j = 0; j < loop1->num_nodes; j++) > + if (bbs1[j] == loop1->latch > + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) > + bbs1[j]->count > + = bbs1[j]->count.apply_probability (true_edge->probability); > + free (bbs1); > + > + /* Proportion second loop's bb counts except those dominated by false > + branch to avoid drop 1s down. */ > + basic_block bbi_copy = get_bb_copy (false_edge->dest); > + bbs2 = get_loop_body (loop2); > + for (j = 0; j < loop2->num_nodes; j++) > + if (bbs2[j] == loop2->latch > + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) > + bbs2[j]->count > + = bbs2[j]->count.apply_probability (true_edge->probability.invert ()); > + free (bbs2); > +} > + > /* Checks if LOOP contains an conditional block whose condition > depends on which side in the iteration space it is, and if so > splits the iteration space into two loops. Returns true if the > @@ -575,7 +608,10 @@ split_loop (class loop *loop1) > stmts2); > tree cond = build2 (guard_code, boolean_type_node, guard_init, border); > if (!initial_true) > - cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); > + cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); > + > + edge true_edge, false_edge; > + extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge); > > /* Now version the loop, placing loop2 after loop1 connecting > them, and fix up SSA form for that. */ > @@ -583,11 +619,11 @@ split_loop (class loop *loop1) > basic_block cond_bb; > > class loop *loop2 = loop_version (loop1, cond, &cond_bb, > - profile_probability::always (), > - profile_probability::always (), > - profile_probability::always (), > - profile_probability::always (), > - true); > + true_edge->probability, > + true_edge->probability.invert (), > + profile_probability::always (), > + profile_probability::always (), > + true); > gcc_assert (loop2); > > edge new_e = connect_loops (loop1, loop2); > @@ -607,6 +643,15 @@ split_loop (class loop *loop1) > tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); > patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true); > > + fix_loop_bb_probability (loop1, loop2, true_edge, false_edge); > + > + /* Fix first loop's exit probability after scaling. */ > + edge exit_to_latch1 = single_pred_edge (loop1->latch); > + exit_to_latch1->probability = exit_to_latch1->probability.apply_scale ( > + true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE); > + single_exit (loop1)->probability > + = exit_to_latch1->probability.invert (); > + > /* Finally patch out the two copies of the condition to be always > true/false (or opposite). */ > gcond *force_true = as_a<gcond *> (last_stmt (bbs[i])); > @@ -1486,8 +1531,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) > initialize_original_copy_tables (); > > struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, > - profile_probability::always (), > - profile_probability::never (), > + invar_branch->probability.invert (), > + invar_branch->probability, > profile_probability::always (), > profile_probability::always (), > true); > @@ -1535,6 +1580,17 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) > between loop1 and loop2. */ > connect_loop_phis (loop1, loop2, to_loop2); > > + edge true_edge, false_edge, skip_edge1, skip_edge2; > + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); > + > + skip_edge1 = true_invar ? false_edge : true_edge; > + skip_edge2 = true_invar ? true_edge : false_edge; > + fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2); > + > + /* Fix first loop's exit probability after scaling. */ > + to_loop1->probability = invar_branch->probability.invert (); > + to_loop2->probability = invar_branch->probability; > + > free_original_copy_tables (); > > return true;
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3f6ad046623..33128061aab 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -575,7 +575,10 @@ split_loop (class loop *loop1) stmts2); tree cond = build2 (guard_code, boolean_type_node, guard_init, border); if (!initial_true) - cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + + edge true_edge, false_edge; + extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge); /* Now version the loop, placing loop2 after loop1 connecting them, and fix up SSA form for that. */ @@ -583,11 +586,11 @@ split_loop (class loop *loop1) basic_block cond_bb; class loop *loop2 = loop_version (loop1, cond, &cond_bb, - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - true); + true_edge->probability, + true_edge->probability.invert (), + profile_probability::always (), + profile_probability::always (), + true); gcc_assert (loop2); edge new_e = connect_loops (loop1, loop2); @@ -607,6 +610,38 @@ split_loop (class loop *loop1) tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1)); patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true); + update_ssa (TODO_update_ssa); + + /* Proportion first loop's bb counts except those dominated by true + branch to avoid drop 1s down. */ + basic_block *bbs1, *bbs2; + bbs1 = get_loop_body (loop1); + unsigned j; + for (j = 0; j < loop1->num_nodes; j++) + if (bbs1[j] == loop1->latch + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest)) + bbs1[j]->count + = bbs1[j]->count.apply_probability (true_edge->probability); + free (bbs1); + + /* Fix first loop's exit probability after scaling. */ + edge exit_to_latch1 = single_pred_edge (loop1->latch); + exit_to_latch1->probability = exit_to_latch1->probability.apply_scale ( + true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE); + single_exit (loop1)->probability + = exit_to_latch1->probability.invert (); + + /* Proportion second loop's bb counts except those dominated by false + branch to avoid drop 1s down. */ + basic_block bbi_copy = get_bb_copy (false_edge->dest); + bbs2 = get_loop_body (loop2); + for (j = 0; j < loop2->num_nodes; j++) + if (bbs2[j] == loop2->latch + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) + bbs2[j]->count = bbs2[j]->count.apply_probability ( + true_edge->probability.invert ()); + free (bbs2); + /* Finally patch out the two copies of the condition to be always true/false (or opposite). */ gcond *force_true = as_a<gcond *> (last_stmt (bbs[i])); @@ -1486,8 +1521,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) initialize_original_copy_tables (); struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, - profile_probability::always (), - profile_probability::never (), + invar_branch->probability.invert (), + invar_branch->probability, profile_probability::always (), profile_probability::always (), true); @@ -1535,6 +1570,40 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) between loop1 and loop2. */ connect_loop_phis (loop1, loop2, to_loop2); + update_ssa (TODO_update_ssa); + + edge true_edge, false_edge, skip_edge1, skip_edge2; + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); + + /* Proportion first loop's bb counts except those dominated by true + branch to avoid drop 1s down. */ + skip_edge1 = true_invar ? false_edge : true_edge; + skip_edge2 = true_invar ? true_edge : false_edge; + basic_block *bbs1, *bbs2; + bbs1 = get_loop_body (loop1); + unsigned j; + for (j = 0; j < loop1->num_nodes; j++) + if (bbs1[j] == loop1->latch + || !dominated_by_p (CDI_DOMINATORS, bbs1[j], skip_edge1->dest)) + bbs1[j]->count + = bbs1[j]->count.apply_probability (skip_edge1->probability); + free (bbs1); + + /* Fix first loop's exit probability after scaling. */ + to_loop1->probability = invar_branch->probability.invert (); + to_loop2->probability = invar_branch->probability; + + /* Proportion second loop's bb counts except those dominated by false + branch to avoid drop 1s down. */ + basic_block bbi_copy = get_bb_copy (skip_edge2->dest); + bbs2 = get_loop_body (loop2); + for (j = 0; j < loop2->num_nodes; j++) + if (bbs2[j] == loop2->latch + || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy)) + bbs2[j]->count + = bbs2[j]->count.apply_probability (skip_edge2->probability); + free (bbs2); + free_original_copy_tables (); return true;