Message ID | 20220419111120.F10BC139BE@imap2.suse-dmz.suse.de |
---|---|
State | New |
Headers | show |
Series | [v2] tree-optimization/104912 - ensure cost model is checked first | expand |
> The following makes sure that when we build the versioning condition > for vectorization including the cost model check, we check for the > cost model and branch over other versioning checks. That is what > the cost modeling assumes, since the cost model check is the only > one accounted for in the scalar outside cost. Currently we emit > all checks as straight-line code combined with bitwise ops which > can result in surprising ordering of checks in the final assembly. > > Since loop_version accepts only a single versioning condition > the splitting is done after the fact. > > The result is a 1.5% speedup of 416.gamess on x86_64 when compiling > with -Ofast and tuning for generic or skylake. That's not enough > to recover from the slowdown when vectorizing but it now cuts off > the expensive alias versioning test. > > This is an update to the previously posted patch splitting the > probability between the two branches as outlined in > https://gcc.gnu.org/pipermail/gcc-patches/2022-March/592597.html > > I've re-bootstrapped and tested this on x86_64-unknown-linux-gnu. > > Honza - is the approach to splitting the probabilities sensible? > This fixes a piece of a P1 regression. > > Thanks, > Richard. > > 2022-03-21 Richard Biener <rguenther@suse.de> > > PR tree-optimization/104912 > * tree-vect-loop-manip.cc (vect_loop_versioning): Split > the cost model check to a separate BB to make sure it is > checked first and not combined with other version checks. > --- > gcc/tree-vect-loop-manip.cc | 60 +++++++++++++++++++++++++++++++++++-- > 1 file changed, 57 insertions(+), 3 deletions(-) > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index 63fb6f669a0..e4381eb7079 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -3445,13 +3445,34 @@ vect_loop_versioning (loop_vec_info loop_vinfo, > cond_expr = expr; > } > > + tree cost_name = NULL_TREE; > + profile_probability prob2 = profile_probability::uninitialized (); > + if (cond_expr > + && !integer_truep (cond_expr) > + && (version_niter > + || version_align > + || version_alias > + || version_simd_if_cond)) I assume that this condition... > > + /* Split the cost model check off to a separate BB. Costing assumes > + this is the only thing we perform when we enter the scalar loop > + from a failed cost decision. */ > + if (cost_name && TREE_CODE (cost_name) == SSA_NAME) is if and only if this condition (otherwise prob2 would get uninitialized or lost) > + { > + gimple *def = SSA_NAME_DEF_STMT (cost_name); > + /* All uses of the cost check are 'true' after the check we > + are going to insert. */ > + replace_uses_by (cost_name, boolean_true_node); > + /* And we're going to build the new single use of it. */ > + gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node, > + NULL_TREE, NULL_TREE); > + edge e = split_block (gimple_bb (def), def); > + gimple_stmt_iterator gsi = gsi_for_stmt (def); > + gsi_insert_after (&gsi, cond, GSI_NEW_STMT); > + edge true_e, false_e; > + extract_true_false_edges_from_block (e->dest, &true_e, &false_e); > + e->flags &= ~EDGE_FALLTHRU; > + e->flags |= EDGE_TRUE_VALUE; > + edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE); > + e->probability = prob2; > + e2->probability = prob2.invert (); So this looks fine to me. Honza > + set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src); > + auto_vec<basic_block, 3> adj; > + for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest); > + son; > + son = next_dom_son (CDI_DOMINATORS, son)) > + if (EDGE_COUNT (son->preds) > 1) > + adj.safe_push (son); > + for (auto son : adj) > + set_immediate_dominator (CDI_DOMINATORS, son, e->src); > + } > + > if (version_niter) > { > /* The versioned loop could be infinite, we need to clear existing > -- > 2.34.1
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 63fb6f669a0..e4381eb7079 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3445,13 +3445,34 @@ vect_loop_versioning (loop_vec_info loop_vinfo, cond_expr = expr; } + tree cost_name = NULL_TREE; + profile_probability prob2 = profile_probability::uninitialized (); + if (cond_expr + && !integer_truep (cond_expr) + && (version_niter + || version_align + || version_alias + || version_simd_if_cond)) + { + cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), + &cond_expr_stmt_list, + is_gimple_val, NULL_TREE); + /* Split prob () into two so that the overall probability of passing + both the cost-model and versioning checks is the orig prob. */ + prob2 = prob.split (prob); + } + if (version_niter) vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr); if (cond_expr) - cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), - &cond_expr_stmt_list, - is_gimple_condexpr, NULL_TREE); + { + gimple_seq tem = NULL; + cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), + &tem, + is_gimple_condexpr, NULL_TREE); + gimple_seq_add_seq (&cond_expr_stmt_list, tem); + } if (version_align) vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, @@ -3655,6 +3676,39 @@ vect_loop_versioning (loop_vec_info loop_vinfo, update_ssa (TODO_update_ssa); } + /* Split the cost model check off to a separate BB. Costing assumes + this is the only thing we perform when we enter the scalar loop + from a failed cost decision. */ + if (cost_name && TREE_CODE (cost_name) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (cost_name); + /* All uses of the cost check are 'true' after the check we + are going to insert. */ + replace_uses_by (cost_name, boolean_true_node); + /* And we're going to build the new single use of it. */ + gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node, + NULL_TREE, NULL_TREE); + edge e = split_block (gimple_bb (def), def); + gimple_stmt_iterator gsi = gsi_for_stmt (def); + gsi_insert_after (&gsi, cond, GSI_NEW_STMT); + edge true_e, false_e; + extract_true_false_edges_from_block (e->dest, &true_e, &false_e); + e->flags &= ~EDGE_FALLTHRU; + e->flags |= EDGE_TRUE_VALUE; + edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE); + e->probability = prob2; + e2->probability = prob2.invert (); + set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src); + auto_vec<basic_block, 3> adj; + for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + if (EDGE_COUNT (son->preds) > 1) + adj.safe_push (son); + for (auto son : adj) + set_immediate_dominator (CDI_DOMINATORS, son, e->src); + } + if (version_niter) { /* The versioned loop could be infinite, we need to clear existing