Message ID | alpine.LNX.2.00.1106291310490.810@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Richard Guenther <rguenther@suse.de> wrote on 29/06/2011 02:19:40 PM: > > The following patch makes us handle invariant loads during vectorization. > Dependence analysis currently isn't clever enough to disambiguate them > thus we insert versioning-for-alias checks. For the testcase hoisting > the load is still always possible though, and for a read-after-write > dependence it would be possible for the vectorized loop copy as the > may-aliasing write is varying by the scalar variable size. > > The existing code for vectorizing invariant accesses looks very > suspicious - it generates a vector load at the scalar address > to then just extract the first vector element. Huh. IMHO this > can be simplified as done, by just re-using the scalar load result. > But maybe this code was supposed to deal with something entirely > different? This code was added to support outer loop vectorization: http://gcc.gnu.org/ml/gcc-patches/2007-08/msg00729.html (with an intention to be improved...). I think your version is fine for the outer loops as well. But with this patch an unused vector load is still created (it will probably be removed later though). Ira > > This patch gives a 33% speedup to the phoronix himeno testcase > if you bump the maximum alias versioning checks we want to insert. > > I'm currently re-bootstrapping & testing this but an earlier version > was ok on x86_64-unknown-linux-gnu. > > 2011-06-29 Richard Guenther <rguenther@suse.de> > > PR tree-optimization/46787 > * tree-data-ref.c (dr_address_invariant_p): Remove. > (find_data_references_in_stmt): Invariant accesses are ok now. > * tree-vect-stmts.c (vectorizable_load): Handle invariant > loads. > * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow > invariant loads. > > * gcc.dg/vect/vect-121.c: New testcase. > > Index: gcc/tree-data-ref.c > =================================================================== > --- gcc/tree-data-ref.c (revision 175531) > +++ gcc/tree-data-ref.c (working copy) > @@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference > } > } > > -/* Returns true if the address of DR is invariant. */ > - > -static bool > -dr_address_invariant_p (struct data_reference *dr) > -{ > - unsigned i; > - tree idx; > - > - FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx) > - if (tree_contains_chrecs (idx, NULL)) > - return false; > - > - return true; > -} > - > /* Frees data reference DR. */ > > void > @@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo > dr = create_data_ref (nest, loop_containing_stmt (stmt), > *ref->pos, stmt, ref->is_read); > gcc_assert (dr != NULL); > - > - /* FIXME -- data dependence analysis does not work correctly > for objects > - with invariant addresses in loop nests. Let us fail here until the > - problem is fixed. */ > - if (dr_address_invariant_p (dr) && nest) > - { > - free_data_ref (dr); > - if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, "\tFAILED as dr address is invariant\n"); > - ret = false; > - break; > - } > - > VEC_safe_push (data_reference_p, heap, *datarefs, dr); > } > VEC_free (data_ref_loc, heap, references); > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c (revision 175531) > +++ gcc/tree-vect-stmts.c (working copy) > @@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s > && code != COMPONENT_REF > && code != IMAGPART_EXPR > && code != REALPART_EXPR > - && code != MEM_REF) > + && code != MEM_REF > + && TREE_CODE_CLASS (code) != tcc_declaration) > return false; > > if (!STMT_VINFO_DATA_REF (stmt_info)) > @@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s > if (inv_p && !bb_vinfo) > { > gcc_assert (!strided_load); > - gcc_assert (nested_in_vect_loop_p (loop, stmt)); > if (j == 0) > { > - int k; > - tree t = NULL_TREE; > - tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); > - > - /* CHECKME: bitpos depends on endianess? */ > - bitpos = bitsize_zero_node; > - vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, > - bitsize, bitpos); > - vec_dest = vect_create_destination_var (scalar_dest, > - NULL_TREE); > - new_stmt = gimple_build_assign (vec_dest, vec_inv); > - new_temp = make_ssa_name (vec_dest, new_stmt); > - gimple_assign_set_lhs (new_stmt, new_temp); > - vect_finish_stmt_generation (stmt, new_stmt, gsi); > - > - for (k = nunits - 1; k >= 0; --k) > - t = tree_cons (NULL_TREE, new_temp, t); > - /* FIXME: use build_constructor directly. */ > - vec_inv = build_constructor_from_list (vectype, t); > + tree vec_inv; > + gimple_stmt_iterator gsi2 = *gsi; > + gsi_next (&gsi2); > + vec_inv = build_vector_from_val (vectype, scalar_dest); > new_temp = vect_init_vector (stmt, vec_inv, > - vectype, gsi); > + vectype, &gsi2); > new_stmt = SSA_NAME_DEF_STMT (new_temp); > } > else > Index: gcc/tree-vect-data-refs.c > =================================================================== > --- gcc/tree-vect-data-refs.c (revision 175531) > +++ gcc/tree-vect-data-refs.c (working copy) > @@ -2302,9 +2302,9 @@ vect_analyze_data_ref_access (struct dat > return false; > } > > - /* Don't allow invariant accesses in loops. */ > + /* Allow invariant loads in loops. */ > if (loop_vinfo && dr_step == 0) > - return false; > + return DR_IS_READ (dr); > > if (loop && nested_in_vect_loop_p (loop, stmt)) > { > Index: gcc/testsuite/gcc.dg/vect/vect-121.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/vect-121.c (revision 0) > +++ gcc/testsuite/gcc.dg/vect/vect-121.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_float } */ > + > +float *x; > +float parm; > +float > +test (int start, int end) > +{ > + int i; > + for (i = start; i < end; ++i) > + { > + float tem = x[i]; > + x[i] = parm * tem; > + } > +} > + > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */
On Wed, Jun 29, 2011 at 4:19 AM, Richard Guenther <rguenther@suse.de> wrote: > > The following patch makes us handle invariant loads during vectorization. > Dependence analysis currently isn't clever enough to disambiguate them > thus we insert versioning-for-alias checks. For the testcase hoisting > the load is still always possible though, and for a read-after-write > dependence it would be possible for the vectorized loop copy as the > may-aliasing write is varying by the scalar variable size. > > The existing code for vectorizing invariant accesses looks very > suspicious - it generates a vector load at the scalar address > to then just extract the first vector element. Huh. IMHO this > can be simplified as done, by just re-using the scalar load result. > But maybe this code was supposed to deal with something entirely > different? > > This patch gives a 33% speedup to the phoronix himeno testcase > if you bump the maximum alias versioning checks we want to insert. > > I'm currently re-bootstrapping & testing this but an earlier version > was ok on x86_64-unknown-linux-gnu. > > 2011-06-29 Richard Guenther <rguenther@suse.de> > > PR tree-optimization/46787 > * tree-data-ref.c (dr_address_invariant_p): Remove. > (find_data_references_in_stmt): Invariant accesses are ok now. > * tree-vect-stmts.c (vectorizable_load): Handle invariant > loads. > * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow > invariant loads. > > * gcc.dg/vect/vect-121.c: New testcase. > This also caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49628
On Mon, Jul 4, 2011 at 6:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Wed, Jun 29, 2011 at 4:19 AM, Richard Guenther <rguenther@suse.de> wrote: >> >> The following patch makes us handle invariant loads during vectorization. >> Dependence analysis currently isn't clever enough to disambiguate them >> thus we insert versioning-for-alias checks. For the testcase hoisting >> the load is still always possible though, and for a read-after-write >> dependence it would be possible for the vectorized loop copy as the >> may-aliasing write is varying by the scalar variable size. >> >> The existing code for vectorizing invariant accesses looks very >> suspicious - it generates a vector load at the scalar address >> to then just extract the first vector element. Huh. IMHO this >> can be simplified as done, by just re-using the scalar load result. >> But maybe this code was supposed to deal with something entirely >> different? >> >> This patch gives a 33% speedup to the phoronix himeno testcase >> if you bump the maximum alias versioning checks we want to insert. >> >> I'm currently re-bootstrapping & testing this but an earlier version >> was ok on x86_64-unknown-linux-gnu. >> >> 2011-06-29 Richard Guenther <rguenther@suse.de> >> >> PR tree-optimization/46787 >> * tree-data-ref.c (dr_address_invariant_p): Remove. >> (find_data_references_in_stmt): Invariant accesses are ok now. >> * tree-vect-stmts.c (vectorizable_load): Handle invariant >> loads. >> * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow >> invariant loads. >> >> * gcc.dg/vect/vect-121.c: New testcase. >> > > This also caused: > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49628 > > It also caused: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49893
different? This patch gives a 33% speedup to the phoronix himeno testcase if you bump the maximum alias versioning checks we want to insert. I'm currently re-bootstrapping & testing this but an earlier version was ok on x86_64-unknown-linux-gnu. 2011-06-29 Richard Guenther <rguenther@suse.de> PR tree-optimization/46787 * tree-data-ref.c (dr_address_invariant_p): Remove. (find_data_references_in_stmt): Invariant accesses are ok now. * tree-vect-stmts.c (vectorizable_load): Handle invariant loads. * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow invariant loads. * gcc.dg/vect/vect-121.c: New testcase. Index: gcc/tree-data-ref.c =================================================================== --- gcc/tree-data-ref.c (revision 175531) +++ gcc/tree-data-ref.c (working copy) @@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference } } -/* Returns true if the address of DR is invariant. */ - -static bool -dr_address_invariant_p (struct data_reference *dr) -{ - unsigned i; - tree idx; - - FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx) - if (tree_contains_chrecs (idx, NULL)) - return false; - - return true; -} - /* Frees data reference DR. */ void @@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo dr = create_data_ref (nest, loop_containing_stmt (stmt), *ref->pos, stmt, ref->is_read); gcc_assert (dr != NULL); - - /* FIXME -- data dependence analysis does not work correctly for objects - with invariant addresses in loop nests. Let us fail here until the - problem is fixed. */ - if (dr_address_invariant_p (dr) && nest) - { - free_data_ref (dr); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "\tFAILED as dr address is invariant\n"); - ret = false; - break; - } - VEC_safe_push (data_reference_p, heap, *datarefs, dr); } VEC_free (data_ref_loc, heap, references); Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 175531) +++ gcc/tree-vect-stmts.c (working copy) @@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s && code != COMPONENT_REF && code != IMAGPART_EXPR && code != REALPART_EXPR - && code != MEM_REF) + && code != MEM_REF + && TREE_CODE_CLASS (code) != tcc_declaration) return false; if (!STMT_VINFO_DATA_REF (stmt_info)) @@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s if (inv_p && !bb_vinfo) { gcc_assert (!strided_load); - gcc_assert (nested_in_vect_loop_p (loop, stmt)); if (j == 0) { - int k; - tree t = NULL_TREE; - tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); - - /* CHECKME: bitpos depends on endianess? */ - bitpos = bitsize_zero_node; - vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, - bitsize, bitpos); - vec_dest = vect_create_destination_var (scalar_dest, - NULL_TREE); - new_stmt = gimple_build_assign (vec_dest, vec_inv); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - for (k = nunits - 1; k >= 0; --k) - t = tree_cons (NULL_TREE, new_temp, t); - /* FIXME: use build_constructor directly. */ - vec_inv = build_constructor_from_list (vectype, t); + tree vec_inv; + gimple_stmt_iterator gsi2 = *gsi; + gsi_next (&gsi2); + vec_inv = build_vector_from_val (vectype, scalar_dest); new_temp = vect_init_vector (stmt, vec_inv, - vectype, gsi); + vectype, &gsi2); new_stmt = SSA_NAME_DEF_STMT (new_temp); } else Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 175531) +++ gcc/tree-vect-data-refs.c (working copy) @@ -2302,9 +2302,9 @@ vect_analyze_data_ref_access (struct dat return false; } - /* Don't allow invariant accesses in loops. */ + /* Allow invariant loads in loops. */ if (loop_vinfo && dr_step == 0) - return false; + return DR_IS_READ (dr); if (loop && nested_in_vect_loop_p (loop, stmt)) { Index: gcc/testsuite/gcc.dg/vect/vect-121.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-121.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/vect-121.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ + +float *x; +float parm; +float +test (int start, int end) +{ + int i; + for (i = start; i < end; ++i) + { + float tem = x[i]; + x[i] = parm * tem; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */