Message ID | mpt8sp332lu.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | [15/n] Consider building nodes from scalars in vect_slp_analyze_node_operations | expand |
On Tue, Oct 29, 2019 at 6:04 PM Richard Sandiford <richard.sandiford@arm.com> wrote: > > If the statements in an SLP node aren't similar enough to be vectorised, > or aren't something the vectoriser has code to handle, the BB vectoriser > tries building the vector from scalars instead. This patch does the > same thing if we're able to build a viable-looking tree but fail later > during the analysis phase, e.g. because the target doesn't support a > particular vector operation. > > This is needed to avoid regressions with a later patch. OK. Thanks, Richard. > > 2019-10-29 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > * tree-vect-slp.c (vect_contains_pattern_stmt_p): New function. > (vect_slp_convert_to_external): Likewise. > (vect_slp_analyze_node_operations): If analysis fails, try building > the node from scalars instead. > > gcc/testsuite/ > * gcc.dg/vect/bb-slp-div-2.c: New test. > > Index: gcc/tree-vect-slp.c > =================================================================== > --- gcc/tree-vect-slp.c 2019-10-29 17:01:46.000000000 +0000 > +++ gcc/tree-vect-slp.c 2019-10-29 17:02:06.355512105 +0000 > @@ -225,6 +225,19 @@ vect_free_oprnd_info (vec<slp_oprnd_info > } > > > +/* Return true if STMTS contains a pattern statement. */ > + > +static bool > +vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts) > +{ > + stmt_vec_info stmt_info; > + unsigned int i; > + FOR_EACH_VEC_ELT (stmts, i, stmt_info) > + if (is_pattern_stmt_p (stmt_info)) > + return true; > + return false; > +} > + > /* Find the place of the data-ref in STMT_INFO in the interleaving chain > that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part > of the chain. */ > @@ -2630,6 +2643,39 @@ vect_slp_analyze_node_operations_1 (vec_ > return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec); > } > > +/* Try to build NODE from scalars, returning true on success. > + NODE_INSTANCE is the SLP instance that contains NODE. */ > + > +static bool > +vect_slp_convert_to_external (vec_info *vinfo, slp_tree node, > + slp_instance node_instance) > +{ > + stmt_vec_info stmt_info; > + unsigned int i; > + > + if (!is_a <bb_vec_info> (vinfo) > + || node == SLP_INSTANCE_TREE (node_instance) > + || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))) > + return false; > + > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "Building vector operands from scalars instead\n"); > + > + /* Don't remove and free the child nodes here, since they could be > + referenced by other structures. The analysis and scheduling phases > + (need to) ignore child nodes of anything that isn't vect_internal_def. */ > + unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); > + SLP_TREE_DEF_TYPE (node) = vect_external_def; > + SLP_TREE_SCALAR_OPS (node).safe_grow (group_size); > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) > + { > + tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt); > + SLP_TREE_SCALAR_OPS (node)[i] = lhs; > + } > + return true; > +} > + > /* Analyze statements contained in SLP tree NODE after recursively analyzing > the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. > > @@ -2656,6 +2702,13 @@ vect_slp_analyze_node_operations (vec_in > { > SLP_TREE_NUMBER_OF_VEC_STMTS (node) > = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); > + /* Cope with cases in which we made a late decision to build the > + node from scalars. */ > + if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def > + && vect_slp_convert_to_external (vinfo, node, node_instance)) > + ; > + else > + gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader)); > return true; > } > > @@ -2715,6 +2768,11 @@ vect_slp_analyze_node_operations (vec_in > if (SLP_TREE_SCALAR_STMTS (child).length () != 0) > STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j]; > > + /* If this node can't be vectorized, try pruning the tree here rather > + than felling the whole thing. */ > + if (!res && vect_slp_convert_to_external (vinfo, node, node_instance)) > + res = true; > + > return res; > } > > Index: gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c > =================================================================== > --- /dev/null 2019-09-17 11:41:18.176664108 +0100 > +++ gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c 2019-10-29 17:02:06.351512133 +0000 > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > + > +int x[4], y[4], z[4]; > + > +void > +f (void) > +{ > + x[0] += y[0] / z[0] * 2; > + x[1] += y[1] / z[1] * 2; > + x[2] += y[2] / z[2] * 2; > + x[3] += y[3] / z[3] * 2; > +} > + > +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_int } } } */
Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2019-10-29 17:01:46.000000000 +0000 +++ gcc/tree-vect-slp.c 2019-10-29 17:02:06.355512105 +0000 @@ -225,6 +225,19 @@ vect_free_oprnd_info (vec<slp_oprnd_info } +/* Return true if STMTS contains a pattern statement. */ + +static bool +vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts) +{ + stmt_vec_info stmt_info; + unsigned int i; + FOR_EACH_VEC_ELT (stmts, i, stmt_info) + if (is_pattern_stmt_p (stmt_info)) + return true; + return false; +} + /* Find the place of the data-ref in STMT_INFO in the interleaving chain that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part of the chain. */ @@ -2630,6 +2643,39 @@ vect_slp_analyze_node_operations_1 (vec_ return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec); } +/* Try to build NODE from scalars, returning true on success. + NODE_INSTANCE is the SLP instance that contains NODE. */ + +static bool +vect_slp_convert_to_external (vec_info *vinfo, slp_tree node, + slp_instance node_instance) +{ + stmt_vec_info stmt_info; + unsigned int i; + + if (!is_a <bb_vec_info> (vinfo) + || node == SLP_INSTANCE_TREE (node_instance) + || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))) + return false; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Building vector operands from scalars instead\n"); + + /* Don't remove and free the child nodes here, since they could be + referenced by other structures. The analysis and scheduling phases + (need to) ignore child nodes of anything that isn't vect_internal_def. */ + unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); + SLP_TREE_DEF_TYPE (node) = vect_external_def; + SLP_TREE_SCALAR_OPS (node).safe_grow (group_size); + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) + { + tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt); + SLP_TREE_SCALAR_OPS (node)[i] = lhs; + } + return true; +} + /* Analyze statements contained in SLP tree NODE after recursively analyzing the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. @@ -2656,6 +2702,13 @@ vect_slp_analyze_node_operations (vec_in { SLP_TREE_NUMBER_OF_VEC_STMTS (node) = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); + /* Cope with cases in which we made a late decision to build the + node from scalars. */ + if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def + && vect_slp_convert_to_external (vinfo, node, node_instance)) + ; + else + gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader)); return true; } @@ -2715,6 +2768,11 @@ vect_slp_analyze_node_operations (vec_in if (SLP_TREE_SCALAR_STMTS (child).length () != 0) STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j]; + /* If this node can't be vectorized, try pruning the tree here rather + than felling the whole thing. */ + if (!res && vect_slp_convert_to_external (vinfo, node, node_instance)) + res = true; + return res; } Index: gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c 2019-10-29 17:02:06.351512133 +0000 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +int x[4], y[4], z[4]; + +void +f (void) +{ + x[0] += y[0] / z[0] * 2; + x[1] += y[1] / z[1] * 2; + x[2] += y[2] / z[2] * 2; + x[3] += y[3] / z[3] * 2; +} + +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_int } } } */