Message ID | CAGYS_TL77Dos_ZvCTz3nBmvQYqxrS2UerjsTcrbjuAvdChcJrA@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Tue, 19 Nov 2013, Sergey Ostanevich wrote: > :) agree to you, but as soon as you're a user who tries to introduce > vector code and face a bug in cost model you'd like to have a > workaround until the bug will be fixed and compiler will come to you > with new OS distribution, don't you? > > I propose the following, yet SLP have to use a NULL as a loop info > which looks somewhat hacky. I think this is overengineering. -fvect-cost-model will do as workaround. And -fsimd-vect-cost-model has what I consider duplicate - "simd" and "vect". Richard. > Sergos > > > * common.opt: Added new option -fsimd-vect-cost-model > * tree-vectorizer.h (unlimited_cost_model): Interface update > to rely on particular loop info > * tree-vect-data-refs.c (vect_peeling_hash_insert): Update to > unlimited_cost_model call according to new interface > (vect_peeling_hash_choose_best_peeling): Ditto > (vect_enhance_data_refs_alignment): Ditto > * tree-vect-slp.c: Ditto > * tree-vect-loop.c (vect_estimate_min_profitable_iters): Ditto > plus issue a warning in case cost model overrides users' directive > > > > diff --git a/gcc/common.opt b/gcc/common.opt > index d5971df..87b3b37 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -2296,6 +2296,10 @@ fvect-cost-model= > Common Joined RejectNegative Enum(vect_cost_model) > Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) > Specifies the cost model for vectorization > > +fsimd-vect-cost-model= > +Common Joined RejectNegative Enum(vect_cost_model) > Var(flag_simd_vect_cost_model) Init(VECT_COST_MODEL_UNLIMITED) > +Specifies the cost model for vectorization in loops marked with > #pragma omp simd > + > Enum > Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown > vectorizer cost model %qs) > > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c > index 83d1f45..e26f704 100644 > --- a/gcc/tree-vect-data-refs.c > +++ b/gcc/tree-vect-data-refs.c > @@ -1090,7 +1090,8 @@ vect_peeling_hash_insert (loop_vec_info > loop_vinfo, struct data_reference *dr, > *new_slot = slot; > } > > - if (!supportable_dr_alignment && unlimited_cost_model ()) > + if (!supportable_dr_alignment > + && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) > slot->count += VECT_MAX_COST; > } > > @@ -1200,7 +1201,7 @@ vect_peeling_hash_choose_best_peeling > (loop_vec_info loop_vinfo, > res.peel_info.dr = NULL; > res.body_cost_vec = stmt_vector_for_cost (); > > - if (!unlimited_cost_model ()) > + if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) > { > res.inside_cost = INT_MAX; > res.outside_cost = INT_MAX; > @@ -1429,7 +1430,7 @@ vect_enhance_data_refs_alignment (loop_vec_info > loop_vinfo) > vectorization factor. > We do this automtically for cost model, since we > calculate cost > for every peeling option. */ > - if (unlimited_cost_model ()) > + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) > possible_npeel_number = vf /nelements; > > /* Handle the aligned case. We may decide to align some other > @@ -1437,7 +1438,7 @@ vect_enhance_data_refs_alignment (loop_vec_info > loop_vinfo) > if (DR_MISALIGNMENT (dr) == 0) > { > npeel_tmp = 0; > - if (unlimited_cost_model ()) > + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) > possible_npeel_number++; > } > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 86ebbd2..be66172 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -2696,7 +2696,7 @@ vect_estimate_min_profitable_iters > (loop_vec_info loop_vinfo, > void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); > > /* Cost model disabled. */ > - if (unlimited_cost_model ()) > + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) > { > dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); > *ret_min_profitable_niters = 0; > @@ -2929,6 +2929,11 @@ vect_estimate_min_profitable_iters > (loop_vec_info loop_vinfo, > /* vector version will never be profitable. */ > else > { > + if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect) > + { > + pedwarn (vect_location, 0, "Vectorization did not happen > for the loop"); > + } > + > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > "cost model: the vector iteration cost = %d " > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index 247bdfd..4b25964 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -2171,7 +2171,7 @@ vect_slp_analyze_bb_1 (basic_block bb) > } > > /* Cost model: check if the vectorization is worthwhile. */ > - if (!unlimited_cost_model () > + if (!unlimited_cost_model (NULL) > && !vect_bb_vectorization_profitable_p (bb_vinfo)) > { > if (dump_enabled_p ()) > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index a6c5b59..2916906 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -919,9 +919,12 @@ known_alignment_for_access_p (struct > data_reference *data_ref_info) > > /* Return true if the vect cost model is unlimited. */ > static inline bool > -unlimited_cost_model () > +unlimited_cost_model (loop_p loop) > { > - return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED; > + return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED > + || (loop != NULL > + && loop->force_vect > + && flag_simd_vect_cost_model == VECT_COST_MODEL_UNLIMITED)); > } > > /* Source location */ > > On Mon, Nov 18, 2013 at 7:13 PM, Richard Biener <rguenther@suse.de> wrote: > > On Mon, 18 Nov 2013, Sergey Ostanevich wrote: > > > >> I would agree that the example is just for the case cost model makes > >> correct estimation But how can we assure ourself that it won't have any > >> mistakes in the future? > > > > We call it bugs and not mistakes and we have bugzilla for it. > > > > Richard. > > > >> I believe it'll be Ok to introduce an extra flag as Jakub proposed for the > >> dedicated simd-forced vectorization to use unlimited cost model. This > >> can be default for -fopenmp or there should be a warning issued that > >> compiler overrides user's request of vectorization. In such a case user > >> can enforce vectorization (even with mentioned results :) with this > >> unlimited cost model for simd. > >> > >> > >> > >> On Fri, Nov 15, 2013 at 6:24 PM, Richard Biener <rguenther@suse.de> wrote: > >> > On Fri, 15 Nov 2013, Sergey Ostanevich wrote: > >> > > >> >> Richard, > >> >> > >> >> here's an example that causes trigger for the cost model. > >> > > >> > I hardly believe that (AVX2) > >> > > >> > .L9: > >> > vmovups (%rsi), %xmm3 > >> > addl $1, %r8d > >> > addq $256, %rsi > >> > vinsertf128 $0x1, -240(%rsi), %ymm3, %ymm1 > >> > vmovups -224(%rsi), %xmm3 > >> > vinsertf128 $0x1, -208(%rsi), %ymm3, %ymm3 > >> > vshufps $136, %ymm3, %ymm1, %ymm3 > >> > vperm2f128 $3, %ymm3, %ymm3, %ymm2 > >> > vshufps $68, %ymm2, %ymm3, %ymm1 > >> > vshufps $238, %ymm2, %ymm3, %ymm2 > >> > vmovups -192(%rsi), %xmm3 > >> > vinsertf128 $1, %xmm2, %ymm1, %ymm2 > >> > vinsertf128 $0x1, -176(%rsi), %ymm3, %ymm1 > >> > vmovups -160(%rsi), %xmm3 > >> > vinsertf128 $0x1, -144(%rsi), %ymm3, %ymm3 > >> > vshufps $136, %ymm3, %ymm1, %ymm3 > >> > vperm2f128 $3, %ymm3, %ymm3, %ymm1 > >> > vshufps $68, %ymm1, %ymm3, %ymm4 > >> > vshufps $238, %ymm1, %ymm3, %ymm1 > >> > vmovups -128(%rsi), %xmm3 > >> > vinsertf128 $1, %xmm1, %ymm4, %ymm1 > >> > vshufps $136, %ymm1, %ymm2, %ymm1 > >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 > >> > vshufps $68, %ymm2, %ymm1, %ymm4 > >> > vshufps $238, %ymm2, %ymm1, %ymm2 > >> > vinsertf128 $0x1, -112(%rsi), %ymm3, %ymm1 > >> > vmovups -96(%rsi), %xmm3 > >> > vinsertf128 $1, %xmm2, %ymm4, %ymm4 > >> > vinsertf128 $0x1, -80(%rsi), %ymm3, %ymm3 > >> > vshufps $136, %ymm3, %ymm1, %ymm3 > >> > vperm2f128 $3, %ymm3, %ymm3, %ymm2 > >> > vshufps $68, %ymm2, %ymm3, %ymm1 > >> > vshufps $238, %ymm2, %ymm3, %ymm2 > >> > vmovups -64(%rsi), %xmm3 > >> > vinsertf128 $1, %xmm2, %ymm1, %ymm2 > >> > vinsertf128 $0x1, -48(%rsi), %ymm3, %ymm1 > >> > vmovups -32(%rsi), %xmm3 > >> > vinsertf128 $0x1, -16(%rsi), %ymm3, %ymm3 > >> > cmpl %r8d, %edi > >> > vshufps $136, %ymm3, %ymm1, %ymm3 > >> > vperm2f128 $3, %ymm3, %ymm3, %ymm1 > >> > vshufps $68, %ymm1, %ymm3, %ymm5 > >> > vshufps $238, %ymm1, %ymm3, %ymm1 > >> > vinsertf128 $1, %xmm1, %ymm5, %ymm1 > >> > vshufps $136, %ymm1, %ymm2, %ymm1 > >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 > >> > vshufps $68, %ymm2, %ymm1, %ymm3 > >> > vshufps $238, %ymm2, %ymm1, %ymm2 > >> > vinsertf128 $1, %xmm2, %ymm3, %ymm1 > >> > vshufps $136, %ymm1, %ymm4, %ymm1 > >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 > >> > vshufps $68, %ymm2, %ymm1, %ymm3 > >> > vshufps $238, %ymm2, %ymm1, %ymm2 > >> > vinsertf128 $1, %xmm2, %ymm3, %ymm2 > >> > vaddps %ymm2, %ymm0, %ymm0 > >> > ja .L9 > >> > > >> > is more efficient than > >> > > >> > .L3: > >> > vaddss (%rcx,%rax), %xmm0, %xmm0 > >> > addq $32, %rax > >> > cmpq %rdx, %rax > >> > jne .L3 > >> > > >> > ;) > >> > > >> >> As soon as > >> >> elemental functions will appear and we update the vectorizer so it can accept > >> >> an elemental function inside the loop - we will have the same > >> >> situation as we have > >> >> it now: cost model will bail out with profitability estimation. > >> > > >> > Yes. > >> > > >> >> Still we have no chance to get info on how efficient the bar() function when it > >> >> is in vector form. > >> > > >> > Well I assume you mean that the speedup when vectorizing the elemental > >> > will offset whatever wreckage we cause with vectorizing the rest of the > >> > statements. I'd say you can at least compare to unrolling by > >> > the vectorization factor, building the vector inputs to the elemental > >> > from scalars, distributing the vector result from the elemental to > >> > scalars. > >> > > >> >> I believe I should repeat: #pragma omp simd is intended for introduction of an > >> >> instruction-level parallel region on developer's request, hence should > >> >> be treated > >> >> in same manner as #pragma omp parallel. Vectorizer cost model is an obstacle > >> >> here, not a help. > >> > > >> > Surely not if there isn't an elemental call in it. With it the > >> > cost model of course will have not enough information to decide. > >> > > >> > But still, what's the difference to the case where we cannot vectorize > >> > the function? What happens if we cannot vectorize the elemental? > >> > Do we have to build scalar versions for all possible vector sizes? > >> > > >> > Richard. > >> > > >> >> Regards, > >> >> Sergos > >> >> > >> >> > >> >> On Fri, Nov 15, 2013 at 1:08 AM, Richard Biener <rguenther@suse.de> wrote: > >> >> > Sergey Ostanevich <sergos.gnu@gmail.com> wrote: > >> >> >>this is only for the whole file? I mean to have a particular loop > >> >> >>vectorized in a > >> >> >>file while all others - up to compiler's cost model. is there such a > >> >> >>machinery? > >> >> > > >> >> > No, there is not. > >> >> > > >> >> > Richard. > >> >> > > >> >> >>Sergos > >> >> >> > >> >> >>On Thu, Nov 14, 2013 at 12:39 PM, Richard Biener <rguenther@suse.de> > >> >> >>wrote: > >> >> >>> On Wed, 13 Nov 2013, Sergey Ostanevich wrote: > >> >> >>> > >> >> >>>> I will get some tests. > >> >> >>>> As for cost analysis - simply consider the pragma as a request to > >> >> >>>> vectorize. How can I - as a developer - enforce it beyond the > >> >> >>pragma? > >> >> >>> > >> >> >>> You can disable the cost model via -fvect-cost-model=unlimited > >> >> >>> > >> >> >>> Richard. > >> >> >>> > >> >> >>>> On Wed, Nov 13, 2013 at 12:55 PM, Richard Biener <rguenther@suse.de> > >> >> >>wrote: > >> >> >>>> > On Tue, 12 Nov 2013, Sergey Ostanevich wrote: > >> >> >>>> > > >> >> >>>> >> The reason patch was in its original state is because we want > >> >> >>>> >> to notify user that his assumption of profitability may be wrong. > >> >> >>>> >> This is not a part of any spec and as far as I know ICC does not > >> >> >>>> >> notify user about the case. Still it can be a good hint for those > >> >> >>>> >> users who tries to get as much as possible performance. > >> >> >>>> >> > >> >> >>>> >> Richard's comment on the vectorization problems is about the same > >> >> >>- > >> >> >>>> >> to inform user that his attempt to force vectorization is failed. > >> >> >>>> >> > >> >> >>>> >> As for profitable or not - sometimes I believe it's impossible to > >> >> >>be > >> >> >>>> >> precise. For OMP we have case of a vector version of a function > >> >> >>>> >> and we have no chance to figure out whether it is profitable to > >> >> >>use > >> >> >>>> >> it or to loose it. If we can't map the loop for any vector length > >> >> >>>> >> other than 1 - I believe in this case we have to bail out and > >> >> >>report. > >> >> >>>> >> Is it about 'never profitable'? > >> >> >>>> > > >> >> >>>> > For example. I think we should report non-vectorized loops > >> >> >>>> > that are marked with force_vect anyway, with > >> >> >>-Wdisabled-optimization. > >> >> >>>> > Another case is that a loop may be profitable to vectorize if > >> >> >>>> > the ISA supports a gather instruction but otherwise not. Or if > >> >> >>the > >> >> >>>> > ISA supports efficient vector construction from N not loop > >> >> >>>> > invariant scalars (for vectorization of strided loads). > >> >> >>>> > > >> >> >>>> > Simply disregarding all of the cost analysis sounds completely > >> >> >>>> > bogus to me. > >> >> >>>> > > >> >> >>>> > I'd simply go for the diagnostic for now, not changing anything > >> >> >>else. > >> >> >>>> > We want to have a good understanding about why the cost model is > >> >> >>>> > so bad that we have to force to ignore it for #pragma simd - thus > >> >> >>we > >> >> >>>> > want testcases. > >> >> >>>> > > >> >> >>>> > Richard. > >> >> >>>> > > >> >> >>>> >> > >> >> >>>> >> On Tue, Nov 12, 2013 at 6:35 PM, Richard Biener > >> >> >><rguenther@suse.de> wrote: > >> >> >>>> >> > On 11/12/13 3:16 PM, Jakub Jelinek wrote: > >> >> >>>> >> >> On Tue, Nov 12, 2013 at 05:46:14PM +0400, Sergey Ostanevich > >> >> >>wrote: > >> >> >>>> >> >>> ivdep just substitutes all cross-iteration data analysis, > >> >> >>>> >> >>> nothing related to cost model. ICC does not cancel its > >> >> >>>> >> >>> cost model in case of #pragma ivdep > >> >> >>>> >> >>> > >> >> >>>> >> >>> as for the safelen - OMP standart treats it as a limitation > >> >> >>>> >> >>> for the vector length. this means if no safelen is present > >> >> >>>> >> >>> an arbitrary vector length can be used. > >> >> >>>> >> >> > >> >> >>>> >> >> I was talking about GCC loop->safelen, which is INT_MAX for > >> >> >>#pragma omp simd > >> >> >>>> >> >> without safelen clause or #pragma simd without vectorlength > >> >> >>clause. > >> >> >>>> >> >> > >> >> >>>> >> >>> so I believe loop->force_vect is the only trigger to > >> >> >>disregard > >> >> >>>> >> >>> the cost model > >> >> >>>> >> >> > >> >> >>>> >> >> Anyway, in that case I think the originally posted patch is > >> >> >>wrong, > >> >> >>>> >> >> if we want to treat force_vect as disregard all the cost model > >> >> >>and > >> >> >>>> >> >> force vectorization (well, the name of the field already kind > >> >> >>of suggest > >> >> >>>> >> >> that), then IMHO we should treat it the same as > >> >> >>-fvect-cost-model=unlimited > >> >> >>>> >> >> for those loops. > >> >> >>>> >> > > >> >> >>>> >> > Err - the user may have a specific sub-architecture in mind > >> >> >>when using > >> >> >>>> >> > #pragma simd, if you say we should completely ignore the cost > >> >> >>model > >> >> >>>> >> > then should we also sorry () if we cannot vectorize the loop > >> >> >>(either > >> >> >>>> >> > because of GCC deficiencies or lack of sub-target support)? > >> >> >>>> >> > > >> >> >>>> >> > That said, at least in the cases that the cost model says the > >> >> >>loop > >> >> >>>> >> > is never profitable to vectorize we should follow its advice. > >> >> >>>> >> > > >> >> >>>> >> > Richard. > >> >> >>>> >> > > >> >> >>>> >> >> Thus (untested): > >> >> >>>> >> >> > >> >> >>>> >> >> 2013-11-12 Jakub Jelinek <jakub@redhat.com> > >> >> >>>> >> >> > >> >> >>>> >> >> * tree-vect-loop.c (vect_estimate_min_profitable_iters): > >> >> >>Use > >> >> >>>> >> >> unlimited cost model also for force_vect loops. > >> >> >>>> >> >> > >> >> >>>> >> >> --- gcc/tree-vect-loop.c.jj 2013-11-12 12:09:40.000000000 > >> >> >>+0100 > >> >> >>>> >> >> +++ gcc/tree-vect-loop.c 2013-11-12 15:11:43.821404330 > >> >> >>+0100 > >> >> >>>> >> >> @@ -2702,7 +2702,7 @@ vect_estimate_min_profitable_iters (loop > >> >> >>>> >> >> void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA > >> >> >>(loop_vinfo); > >> >> >>>> >> >> > >> >> >>>> >> >> /* Cost model disabled. */ > >> >> >>>> >> >> - if (unlimited_cost_model ()) > >> >> >>>> >> >> + if (unlimited_cost_model () || LOOP_VINFO_LOOP > >> >> >>(loop_vinfo)->force_vect) > >> >> >>>> >> >> { > >> >> >>>> >> >> dump_printf_loc (MSG_NOTE, vect_location, "cost model > >> >> >>disabled.\n"); > >> >> >>>> >> >> *ret_min_profitable_niters = 0; > >> >> >>>> >> >> > >> >> >>>> >> >> Jakub > >> >> >>>> >> >> > >> >> >>>> >> > > >> >> >>>> >> > >> >> >>>> >> > >> >> >>>> > > >> >> >>>> > -- > >> >> >>>> > Richard Biener <rguenther@suse.de> > >> >> >>>> > SUSE / SUSE Labs > >> >> >>>> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > >> >> >>>> > GF: Jeff Hawn, Jennifer Guild, Felix Imend > >> >> >>>> > >> >> >>>> > >> >> >>> > >> >> >>> -- > >> >> >>> Richard Biener <rguenther@suse.de> > >> >> >>> SUSE / SUSE Labs > >> >> >>> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > >> >> >>> GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer > >> >> > > >> >> > > >> >> > >> > > >> > -- > >> > Richard Biener <rguenther@suse.de> > >> > SUSE / SUSE Labs > >> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > >> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer > >> > >> > > > > -- > > Richard Biener <rguenther@suse.de> > > SUSE / SUSE Labs > > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer > >
On Tue, Nov 19, 2013 at 03:07:52PM +0100, Richard Biener wrote: > On Tue, 19 Nov 2013, Sergey Ostanevich wrote: > > > :) agree to you, but as soon as you're a user who tries to introduce > > vector code and face a bug in cost model you'd like to have a > > workaround until the bug will be fixed and compiler will come to you > > with new OS distribution, don't you? > > > > I propose the following, yet SLP have to use a NULL as a loop info > > which looks somewhat hacky. > > I think this is overengineering. -fvect-cost-model will do as > workaround. And -fsimd-vect-cost-model has what I consider > duplicate - "simd" and "vect". I think it is a good idea, though I agree about s/simd-vect/simd/ and I'd use VECT_COST_MODEL_DEFAULT as the default, which would mean just use -fvect-cost-model. > > @@ -2929,6 +2929,11 @@ vect_estimate_min_profitable_iters > > (loop_vec_info loop_vinfo, > > /* vector version will never be profitable. */ > > else > > { > > + if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect) > > + { > > + pedwarn (vect_location, 0, "Vectorization did not happen > > for the loop"); > > + } pedwarn isn't really desirable for this, you want just warning, but some warning you can actually also turn off. -Wopenmp-simd (and we'd use it also when we ignore #pragma omp declare simd because it wasn't useful/desirable). Jakub
On Tue, Nov 19, 2013 at 6:07 PM, Richard Biener <rguenther@suse.de> wrote: > On Tue, 19 Nov 2013, Sergey Ostanevich wrote: > >> :) agree to you, but as soon as you're a user who tries to introduce >> vector code and face a bug in cost model you'd like to have a >> workaround until the bug will be fixed and compiler will come to you >> with new OS distribution, don't you? >> >> I propose the following, yet SLP have to use a NULL as a loop info >> which looks somewhat hacky. > > I think this is overengineering. -fvect-cost-model will do as > workaround. And -fsimd-vect-cost-model has what I consider > duplicate - "simd" and "vect". I just wanted to separate the autovectorized loops from ones user wants to vectorize. The -fvect-cost-model will force all at once. That's the reason to introcude the simd-vect, since pragma name is simd. > > Richard. > >> Sergos >> >> >> * common.opt: Added new option -fsimd-vect-cost-model >> * tree-vectorizer.h (unlimited_cost_model): Interface update >> to rely on particular loop info >> * tree-vect-data-refs.c (vect_peeling_hash_insert): Update to >> unlimited_cost_model call according to new interface >> (vect_peeling_hash_choose_best_peeling): Ditto >> (vect_enhance_data_refs_alignment): Ditto >> * tree-vect-slp.c: Ditto >> * tree-vect-loop.c (vect_estimate_min_profitable_iters): Ditto >> plus issue a warning in case cost model overrides users' directive >> >> >> >> diff --git a/gcc/common.opt b/gcc/common.opt >> index d5971df..87b3b37 100644 >> --- a/gcc/common.opt >> +++ b/gcc/common.opt >> @@ -2296,6 +2296,10 @@ fvect-cost-model= >> Common Joined RejectNegative Enum(vect_cost_model) >> Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) >> Specifies the cost model for vectorization >> >> +fsimd-vect-cost-model= >> +Common Joined RejectNegative Enum(vect_cost_model) >> Var(flag_simd_vect_cost_model) Init(VECT_COST_MODEL_UNLIMITED) >> +Specifies the cost model for vectorization in loops marked with >> #pragma omp simd >> + >> Enum >> Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown >> vectorizer cost model %qs) >> >> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c >> index 83d1f45..e26f704 100644 >> --- a/gcc/tree-vect-data-refs.c >> +++ b/gcc/tree-vect-data-refs.c >> @@ -1090,7 +1090,8 @@ vect_peeling_hash_insert (loop_vec_info >> loop_vinfo, struct data_reference *dr, >> *new_slot = slot; >> } >> >> - if (!supportable_dr_alignment && unlimited_cost_model ()) >> + if (!supportable_dr_alignment >> + && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) >> slot->count += VECT_MAX_COST; >> } >> >> @@ -1200,7 +1201,7 @@ vect_peeling_hash_choose_best_peeling >> (loop_vec_info loop_vinfo, >> res.peel_info.dr = NULL; >> res.body_cost_vec = stmt_vector_for_cost (); >> >> - if (!unlimited_cost_model ()) >> + if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) >> { >> res.inside_cost = INT_MAX; >> res.outside_cost = INT_MAX; >> @@ -1429,7 +1430,7 @@ vect_enhance_data_refs_alignment (loop_vec_info >> loop_vinfo) >> vectorization factor. >> We do this automtically for cost model, since we >> calculate cost >> for every peeling option. */ >> - if (unlimited_cost_model ()) >> + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) >> possible_npeel_number = vf /nelements; >> >> /* Handle the aligned case. We may decide to align some other >> @@ -1437,7 +1438,7 @@ vect_enhance_data_refs_alignment (loop_vec_info >> loop_vinfo) >> if (DR_MISALIGNMENT (dr) == 0) >> { >> npeel_tmp = 0; >> - if (unlimited_cost_model ()) >> + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) >> possible_npeel_number++; >> } >> >> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c >> index 86ebbd2..be66172 100644 >> --- a/gcc/tree-vect-loop.c >> +++ b/gcc/tree-vect-loop.c >> @@ -2696,7 +2696,7 @@ vect_estimate_min_profitable_iters >> (loop_vec_info loop_vinfo, >> void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); >> >> /* Cost model disabled. */ >> - if (unlimited_cost_model ()) >> + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) >> { >> dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); >> *ret_min_profitable_niters = 0; >> @@ -2929,6 +2929,11 @@ vect_estimate_min_profitable_iters >> (loop_vec_info loop_vinfo, >> /* vector version will never be profitable. */ >> else >> { >> + if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect) >> + { >> + pedwarn (vect_location, 0, "Vectorization did not happen >> for the loop"); >> + } >> + >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> "cost model: the vector iteration cost = %d " >> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c >> index 247bdfd..4b25964 100644 >> --- a/gcc/tree-vect-slp.c >> +++ b/gcc/tree-vect-slp.c >> @@ -2171,7 +2171,7 @@ vect_slp_analyze_bb_1 (basic_block bb) >> } >> >> /* Cost model: check if the vectorization is worthwhile. */ >> - if (!unlimited_cost_model () >> + if (!unlimited_cost_model (NULL) >> && !vect_bb_vectorization_profitable_p (bb_vinfo)) >> { >> if (dump_enabled_p ()) >> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h >> index a6c5b59..2916906 100644 >> --- a/gcc/tree-vectorizer.h >> +++ b/gcc/tree-vectorizer.h >> @@ -919,9 +919,12 @@ known_alignment_for_access_p (struct >> data_reference *data_ref_info) >> >> /* Return true if the vect cost model is unlimited. */ >> static inline bool >> -unlimited_cost_model () >> +unlimited_cost_model (loop_p loop) >> { >> - return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED; >> + return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED >> + || (loop != NULL >> + && loop->force_vect >> + && flag_simd_vect_cost_model == VECT_COST_MODEL_UNLIMITED)); >> } >> >> /* Source location */ >> >> On Mon, Nov 18, 2013 at 7:13 PM, Richard Biener <rguenther@suse.de> wrote: >> > On Mon, 18 Nov 2013, Sergey Ostanevich wrote: >> > >> >> I would agree that the example is just for the case cost model makes >> >> correct estimation But how can we assure ourself that it won't have any >> >> mistakes in the future? >> > >> > We call it bugs and not mistakes and we have bugzilla for it. >> > >> > Richard. >> > >> >> I believe it'll be Ok to introduce an extra flag as Jakub proposed for the >> >> dedicated simd-forced vectorization to use unlimited cost model. This >> >> can be default for -fopenmp or there should be a warning issued that >> >> compiler overrides user's request of vectorization. In such a case user >> >> can enforce vectorization (even with mentioned results :) with this >> >> unlimited cost model for simd. >> >> >> >> >> >> >> >> On Fri, Nov 15, 2013 at 6:24 PM, Richard Biener <rguenther@suse.de> wrote: >> >> > On Fri, 15 Nov 2013, Sergey Ostanevich wrote: >> >> > >> >> >> Richard, >> >> >> >> >> >> here's an example that causes trigger for the cost model. >> >> > >> >> > I hardly believe that (AVX2) >> >> > >> >> > .L9: >> >> > vmovups (%rsi), %xmm3 >> >> > addl $1, %r8d >> >> > addq $256, %rsi >> >> > vinsertf128 $0x1, -240(%rsi), %ymm3, %ymm1 >> >> > vmovups -224(%rsi), %xmm3 >> >> > vinsertf128 $0x1, -208(%rsi), %ymm3, %ymm3 >> >> > vshufps $136, %ymm3, %ymm1, %ymm3 >> >> > vperm2f128 $3, %ymm3, %ymm3, %ymm2 >> >> > vshufps $68, %ymm2, %ymm3, %ymm1 >> >> > vshufps $238, %ymm2, %ymm3, %ymm2 >> >> > vmovups -192(%rsi), %xmm3 >> >> > vinsertf128 $1, %xmm2, %ymm1, %ymm2 >> >> > vinsertf128 $0x1, -176(%rsi), %ymm3, %ymm1 >> >> > vmovups -160(%rsi), %xmm3 >> >> > vinsertf128 $0x1, -144(%rsi), %ymm3, %ymm3 >> >> > vshufps $136, %ymm3, %ymm1, %ymm3 >> >> > vperm2f128 $3, %ymm3, %ymm3, %ymm1 >> >> > vshufps $68, %ymm1, %ymm3, %ymm4 >> >> > vshufps $238, %ymm1, %ymm3, %ymm1 >> >> > vmovups -128(%rsi), %xmm3 >> >> > vinsertf128 $1, %xmm1, %ymm4, %ymm1 >> >> > vshufps $136, %ymm1, %ymm2, %ymm1 >> >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 >> >> > vshufps $68, %ymm2, %ymm1, %ymm4 >> >> > vshufps $238, %ymm2, %ymm1, %ymm2 >> >> > vinsertf128 $0x1, -112(%rsi), %ymm3, %ymm1 >> >> > vmovups -96(%rsi), %xmm3 >> >> > vinsertf128 $1, %xmm2, %ymm4, %ymm4 >> >> > vinsertf128 $0x1, -80(%rsi), %ymm3, %ymm3 >> >> > vshufps $136, %ymm3, %ymm1, %ymm3 >> >> > vperm2f128 $3, %ymm3, %ymm3, %ymm2 >> >> > vshufps $68, %ymm2, %ymm3, %ymm1 >> >> > vshufps $238, %ymm2, %ymm3, %ymm2 >> >> > vmovups -64(%rsi), %xmm3 >> >> > vinsertf128 $1, %xmm2, %ymm1, %ymm2 >> >> > vinsertf128 $0x1, -48(%rsi), %ymm3, %ymm1 >> >> > vmovups -32(%rsi), %xmm3 >> >> > vinsertf128 $0x1, -16(%rsi), %ymm3, %ymm3 >> >> > cmpl %r8d, %edi >> >> > vshufps $136, %ymm3, %ymm1, %ymm3 >> >> > vperm2f128 $3, %ymm3, %ymm3, %ymm1 >> >> > vshufps $68, %ymm1, %ymm3, %ymm5 >> >> > vshufps $238, %ymm1, %ymm3, %ymm1 >> >> > vinsertf128 $1, %xmm1, %ymm5, %ymm1 >> >> > vshufps $136, %ymm1, %ymm2, %ymm1 >> >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 >> >> > vshufps $68, %ymm2, %ymm1, %ymm3 >> >> > vshufps $238, %ymm2, %ymm1, %ymm2 >> >> > vinsertf128 $1, %xmm2, %ymm3, %ymm1 >> >> > vshufps $136, %ymm1, %ymm4, %ymm1 >> >> > vperm2f128 $3, %ymm1, %ymm1, %ymm2 >> >> > vshufps $68, %ymm2, %ymm1, %ymm3 >> >> > vshufps $238, %ymm2, %ymm1, %ymm2 >> >> > vinsertf128 $1, %xmm2, %ymm3, %ymm2 >> >> > vaddps %ymm2, %ymm0, %ymm0 >> >> > ja .L9 >> >> > >> >> > is more efficient than >> >> > >> >> > .L3: >> >> > vaddss (%rcx,%rax), %xmm0, %xmm0 >> >> > addq $32, %rax >> >> > cmpq %rdx, %rax >> >> > jne .L3 >> >> > >> >> > ;) >> >> > >> >> >> As soon as >> >> >> elemental functions will appear and we update the vectorizer so it can accept >> >> >> an elemental function inside the loop - we will have the same >> >> >> situation as we have >> >> >> it now: cost model will bail out with profitability estimation. >> >> > >> >> > Yes. >> >> > >> >> >> Still we have no chance to get info on how efficient the bar() function when it >> >> >> is in vector form. >> >> > >> >> > Well I assume you mean that the speedup when vectorizing the elemental >> >> > will offset whatever wreckage we cause with vectorizing the rest of the >> >> > statements. I'd say you can at least compare to unrolling by >> >> > the vectorization factor, building the vector inputs to the elemental >> >> > from scalars, distributing the vector result from the elemental to >> >> > scalars. >> >> > >> >> >> I believe I should repeat: #pragma omp simd is intended for introduction of an >> >> >> instruction-level parallel region on developer's request, hence should >> >> >> be treated >> >> >> in same manner as #pragma omp parallel. Vectorizer cost model is an obstacle >> >> >> here, not a help. >> >> > >> >> > Surely not if there isn't an elemental call in it. With it the >> >> > cost model of course will have not enough information to decide. >> >> > >> >> > But still, what's the difference to the case where we cannot vectorize >> >> > the function? What happens if we cannot vectorize the elemental? >> >> > Do we have to build scalar versions for all possible vector sizes? >> >> > >> >> > Richard. >> >> > >> >> >> Regards, >> >> >> Sergos >> >> >> >> >> >> >> >> >> On Fri, Nov 15, 2013 at 1:08 AM, Richard Biener <rguenther@suse.de> wrote: >> >> >> > Sergey Ostanevich <sergos.gnu@gmail.com> wrote: >> >> >> >>this is only for the whole file? I mean to have a particular loop >> >> >> >>vectorized in a >> >> >> >>file while all others - up to compiler's cost model. is there such a >> >> >> >>machinery? >> >> >> > >> >> >> > No, there is not. >> >> >> > >> >> >> > Richard. >> >> >> > >> >> >> >>Sergos >> >> >> >> >> >> >> >>On Thu, Nov 14, 2013 at 12:39 PM, Richard Biener <rguenther@suse.de> >> >> >> >>wrote: >> >> >> >>> On Wed, 13 Nov 2013, Sergey Ostanevich wrote: >> >> >> >>> >> >> >> >>>> I will get some tests. >> >> >> >>>> As for cost analysis - simply consider the pragma as a request to >> >> >> >>>> vectorize. How can I - as a developer - enforce it beyond the >> >> >> >>pragma? >> >> >> >>> >> >> >> >>> You can disable the cost model via -fvect-cost-model=unlimited >> >> >> >>> >> >> >> >>> Richard. >> >> >> >>> >> >> >> >>>> On Wed, Nov 13, 2013 at 12:55 PM, Richard Biener <rguenther@suse.de> >> >> >> >>wrote: >> >> >> >>>> > On Tue, 12 Nov 2013, Sergey Ostanevich wrote: >> >> >> >>>> > >> >> >> >>>> >> The reason patch was in its original state is because we want >> >> >> >>>> >> to notify user that his assumption of profitability may be wrong. >> >> >> >>>> >> This is not a part of any spec and as far as I know ICC does not >> >> >> >>>> >> notify user about the case. Still it can be a good hint for those >> >> >> >>>> >> users who tries to get as much as possible performance. >> >> >> >>>> >> >> >> >> >>>> >> Richard's comment on the vectorization problems is about the same >> >> >> >>- >> >> >> >>>> >> to inform user that his attempt to force vectorization is failed. >> >> >> >>>> >> >> >> >> >>>> >> As for profitable or not - sometimes I believe it's impossible to >> >> >> >>be >> >> >> >>>> >> precise. For OMP we have case of a vector version of a function >> >> >> >>>> >> and we have no chance to figure out whether it is profitable to >> >> >> >>use >> >> >> >>>> >> it or to loose it. If we can't map the loop for any vector length >> >> >> >>>> >> other than 1 - I believe in this case we have to bail out and >> >> >> >>report. >> >> >> >>>> >> Is it about 'never profitable'? >> >> >> >>>> > >> >> >> >>>> > For example. I think we should report non-vectorized loops >> >> >> >>>> > that are marked with force_vect anyway, with >> >> >> >>-Wdisabled-optimization. >> >> >> >>>> > Another case is that a loop may be profitable to vectorize if >> >> >> >>>> > the ISA supports a gather instruction but otherwise not. Or if >> >> >> >>the >> >> >> >>>> > ISA supports efficient vector construction from N not loop >> >> >> >>>> > invariant scalars (for vectorization of strided loads). >> >> >> >>>> > >> >> >> >>>> > Simply disregarding all of the cost analysis sounds completely >> >> >> >>>> > bogus to me. >> >> >> >>>> > >> >> >> >>>> > I'd simply go for the diagnostic for now, not changing anything >> >> >> >>else. >> >> >> >>>> > We want to have a good understanding about why the cost model is >> >> >> >>>> > so bad that we have to force to ignore it for #pragma simd - thus >> >> >> >>we >> >> >> >>>> > want testcases. >> >> >> >>>> > >> >> >> >>>> > Richard. >> >> >> >>>> > >> >> >> >>>> >> >> >> >> >>>> >> On Tue, Nov 12, 2013 at 6:35 PM, Richard Biener >> >> >> >><rguenther@suse.de> wrote: >> >> >> >>>> >> > On 11/12/13 3:16 PM, Jakub Jelinek wrote: >> >> >> >>>> >> >> On Tue, Nov 12, 2013 at 05:46:14PM +0400, Sergey Ostanevich >> >> >> >>wrote: >> >> >> >>>> >> >>> ivdep just substitutes all cross-iteration data analysis, >> >> >> >>>> >> >>> nothing related to cost model. ICC does not cancel its >> >> >> >>>> >> >>> cost model in case of #pragma ivdep >> >> >> >>>> >> >>> >> >> >> >>>> >> >>> as for the safelen - OMP standart treats it as a limitation >> >> >> >>>> >> >>> for the vector length. this means if no safelen is present >> >> >> >>>> >> >>> an arbitrary vector length can be used. >> >> >> >>>> >> >> >> >> >> >>>> >> >> I was talking about GCC loop->safelen, which is INT_MAX for >> >> >> >>#pragma omp simd >> >> >> >>>> >> >> without safelen clause or #pragma simd without vectorlength >> >> >> >>clause. >> >> >> >>>> >> >> >> >> >> >>>> >> >>> so I believe loop->force_vect is the only trigger to >> >> >> >>disregard >> >> >> >>>> >> >>> the cost model >> >> >> >>>> >> >> >> >> >> >>>> >> >> Anyway, in that case I think the originally posted patch is >> >> >> >>wrong, >> >> >> >>>> >> >> if we want to treat force_vect as disregard all the cost model >> >> >> >>and >> >> >> >>>> >> >> force vectorization (well, the name of the field already kind >> >> >> >>of suggest >> >> >> >>>> >> >> that), then IMHO we should treat it the same as >> >> >> >>-fvect-cost-model=unlimited >> >> >> >>>> >> >> for those loops. >> >> >> >>>> >> > >> >> >> >>>> >> > Err - the user may have a specific sub-architecture in mind >> >> >> >>when using >> >> >> >>>> >> > #pragma simd, if you say we should completely ignore the cost >> >> >> >>model >> >> >> >>>> >> > then should we also sorry () if we cannot vectorize the loop >> >> >> >>(either >> >> >> >>>> >> > because of GCC deficiencies or lack of sub-target support)? >> >> >> >>>> >> > >> >> >> >>>> >> > That said, at least in the cases that the cost model says the >> >> >> >>loop >> >> >> >>>> >> > is never profitable to vectorize we should follow its advice. >> >> >> >>>> >> > >> >> >> >>>> >> > Richard. >> >> >> >>>> >> > >> >> >> >>>> >> >> Thus (untested): >> >> >> >>>> >> >> >> >> >> >>>> >> >> 2013-11-12 Jakub Jelinek <jakub@redhat.com> >> >> >> >>>> >> >> >> >> >> >>>> >> >> * tree-vect-loop.c (vect_estimate_min_profitable_iters): >> >> >> >>Use >> >> >> >>>> >> >> unlimited cost model also for force_vect loops. >> >> >> >>>> >> >> >> >> >> >>>> >> >> --- gcc/tree-vect-loop.c.jj 2013-11-12 12:09:40.000000000 >> >> >> >>+0100 >> >> >> >>>> >> >> +++ gcc/tree-vect-loop.c 2013-11-12 15:11:43.821404330 >> >> >> >>+0100 >> >> >> >>>> >> >> @@ -2702,7 +2702,7 @@ vect_estimate_min_profitable_iters (loop >> >> >> >>>> >> >> void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA >> >> >> >>(loop_vinfo); >> >> >> >>>> >> >> >> >> >> >>>> >> >> /* Cost model disabled. */ >> >> >> >>>> >> >> - if (unlimited_cost_model ()) >> >> >> >>>> >> >> + if (unlimited_cost_model () || LOOP_VINFO_LOOP >> >> >> >>(loop_vinfo)->force_vect) >> >> >> >>>> >> >> { >> >> >> >>>> >> >> dump_printf_loc (MSG_NOTE, vect_location, "cost model >> >> >> >>disabled.\n"); >> >> >> >>>> >> >> *ret_min_profitable_niters = 0; >> >> >> >>>> >> >> >> >> >> >>>> >> >> Jakub >> >> >> >>>> >> >> >> >> >> >>>> >> > >> >> >> >>>> >> >> >> >> >>>> >> >> >> >> >>>> > >> >> >> >>>> > -- >> >> >> >>>> > Richard Biener <rguenther@suse.de> >> >> >> >>>> > SUSE / SUSE Labs >> >> >> >>>> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 >> >> >> >>>> > GF: Jeff Hawn, Jennifer Guild, Felix Imend >> >> >> >>>> >> >> >> >>>> >> >> >> >>> >> >> >> >>> -- >> >> >> >>> Richard Biener <rguenther@suse.de> >> >> >> >>> SUSE / SUSE Labs >> >> >> >>> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 >> >> >> >>> GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer >> >> >> > >> >> >> > >> >> >> >> >> > >> >> > -- >> >> > Richard Biener <rguenther@suse.de> >> >> > SUSE / SUSE Labs >> >> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 >> >> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer >> >> >> >> >> > >> > -- >> > Richard Biener <rguenther@suse.de> >> > SUSE / SUSE Labs >> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 >> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer >> >> > > -- > Richard Biener <rguenther@suse.de> > SUSE / SUSE Labs > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
>> > I propose the following, yet SLP have to use a NULL as a loop info >> > which looks somewhat hacky. >> >> I think this is overengineering. -fvect-cost-model will do as >> workaround. And -fsimd-vect-cost-model has what I consider >> duplicate - "simd" and "vect". > > I think it is a good idea, though I agree about s/simd-vect/simd/ and > I'd use VECT_COST_MODEL_DEFAULT as the default, which would mean > just use -fvect-cost-model. that's ok, since we'd have a way to force those 'simd' loops. > >> > @@ -2929,6 +2929,11 @@ vect_estimate_min_profitable_iters >> > (loop_vec_info loop_vinfo, >> > /* vector version will never be profitable. */ >> > else >> > { >> > + if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect) >> > + { >> > + pedwarn (vect_location, 0, "Vectorization did not happen >> > for the loop"); >> > + } > > pedwarn isn't really desirable for this, you want just warning, > but some warning you can actually also turn off. > -Wopenmp-simd (and we'd use it also when we ignore #pragma omp declare simd > because it wasn't useful/desirable). consider a user is interested in enabling warning-as-error for this case? can we disable the pedwarn the same way? Sergos > > Jakub
On Tue, Nov 19, 2013 at 06:39:48PM +0400, Sergey Ostanevich wrote: > > pedwarn isn't really desirable for this, you want just warning, > > but some warning you can actually also turn off. > > -Wopenmp-simd (and we'd use it also when we ignore #pragma omp declare simd > > because it wasn't useful/desirable). > > consider a user is interested in enabling warning-as-error for this case? -Werror=openmp-simd will work then, this works for any named warnings. > can we disable the pedwarn the same way? pedwarn is for pedantic warnings, no standard says that #pragma omp simd must be vectorized, or that #pragma omp simd or #pragma omp declare simd is anything but an optimization hint, so pedwarn isn't what you are looking for. Jakub
diff --git a/gcc/common.opt b/gcc/common.opt index d5971df..87b3b37 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2296,6 +2296,10 @@ fvect-cost-model= Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Specifies the cost model for vectorization +fsimd-vect-cost-model= +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_vect_cost_model) Init(VECT_COST_MODEL_UNLIMITED) +Specifies the cost model for vectorization in loops marked with #pragma omp simd + Enum Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs) diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 83d1f45..e26f704 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1090,7 +1090,8 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr, *new_slot = slot; } - if (!supportable_dr_alignment && unlimited_cost_model ()) + if (!supportable_dr_alignment + && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) slot->count += VECT_MAX_COST; } @@ -1200,7 +1201,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo, res.peel_info.dr = NULL; res.body_cost_vec = stmt_vector_for_cost (); - if (!unlimited_cost_model ()) + if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) { res.inside_cost = INT_MAX; res.outside_cost = INT_MAX; @@ -1429,7 +1430,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) vectorization factor. We do this automtically for cost model, since we calculate cost for every peeling option. */ - if (unlimited_cost_model ()) + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) possible_npeel_number = vf /nelements; /* Handle the aligned case. We may decide to align some other @@ -1437,7 +1438,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (DR_MISALIGNMENT (dr) == 0) { npeel_tmp = 0; - if (unlimited_cost_model ()) + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) possible_npeel_number++; } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 86ebbd2..be66172 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2696,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ - if (unlimited_cost_model ()) + if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) { dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n"); *ret_min_profitable_niters = 0; @@ -2929,6 +2929,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, /* vector version will never be profitable. */ else { + if (LOOP_VINFO_LOOP (loop_vinfo)->force_vect) + { + pedwarn (vect_location, 0, "Vectorization did not happen for the loop"); + } + if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "cost model: the vector iteration cost = %d " diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 247bdfd..4b25964 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2171,7 +2171,7 @@ vect_slp_analyze_bb_1 (basic_block bb) } /* Cost model: check if the vectorization is worthwhile. */ - if (!unlimited_cost_model () + if (!unlimited_cost_model (NULL) && !vect_bb_vectorization_profitable_p (bb_vinfo)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a6c5b59..2916906 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -919,9 +919,12 @@ known_alignment_for_access_p (struct data_reference *data_ref_info) /* Return true if the vect cost model is unlimited. */ static inline bool -unlimited_cost_model () +unlimited_cost_model (loop_p loop) { - return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED; + return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED + || (loop != NULL + && loop->force_vect + && flag_simd_vect_cost_model == VECT_COST_MODEL_UNLIMITED)); }