Message ID | nycvar.YFH.7.76.2104071841180.16468@elmra.sevgm.obk |
---|---|
State | New |
Headers | show |
Series | tree-optimization/99956 - improve loop interchange | expand |
On Wed, Apr 7, 2021 at 6:43 PM Richard Biener <rguenther@suse.de> wrote: > > When we apply store motion and DSE manually to the bwaves kernel > in gfortran.dg/pr81303.f loop interchange no longer happens because > the perfect nest considered covers outer loops we cannot analyze > strides for. The following compensates for this by shrinking the > nest in this analysis which was already possible but on a too coarse > granularity. It shares the shrinked nest with the rest of the DRs > so the complexity overhead should be negligible. > > Bootstrapped & tested on x86_64-unknown-linux-gnu, SPEC FP CPU 2017 > build and there's still only the single loop interchange in bwaves. > > Queued for stage1. And applied as g:6ff66d1ea48960fe96bb51a750c01135e65fe452 > 2021-04-07 Richard Biener <rguenther@suse.de> > > PR tree-optimization/99956 > * gimple-loop-interchange.cc (compute_access_stride): > Try instantiating the access in a shallower loop nest > if instantiating failed. > (compute_access_strides): Pass adjustable loop_nest > to compute_access_stride. > > * gfortran.dg/pr99956.f: New testcase. > --- > gcc/gimple-loop-interchange.cc | 68 +++++++++++++++++------------ > gcc/testsuite/gfortran.dg/pr99956.f | 45 +++++++++++++++++++ > 2 files changed, 85 insertions(+), 28 deletions(-) > create mode 100644 gcc/testsuite/gfortran.dg/pr99956.f > > diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc > index f45b9364644..80f749b6071 100644 > --- a/gcc/gimple-loop-interchange.cc > +++ b/gcc/gimple-loop-interchange.cc > @@ -1280,12 +1280,15 @@ tree_loop_interchange::move_code_to_inner_loop (class loop *outer, > arr[i][j - 1][k] = 0; */ > > static void > -compute_access_stride (class loop *loop_nest, class loop *loop, > +compute_access_stride (class loop *&loop_nest, class loop *loop, > data_reference_p dr) > { > vec<tree> *strides = new vec<tree> (); > - basic_block bb = gimple_bb (DR_STMT (dr)); > + dr->aux = strides; > > + basic_block bb = gimple_bb (DR_STMT (dr)); > + if (!flow_bb_inside_loop_p (loop_nest, bb)) > + return; > while (!flow_bb_inside_loop_p (loop, bb)) > { > strides->safe_push (build_int_cst (sizetype, 0)); > @@ -1313,39 +1316,47 @@ compute_access_stride (class loop *loop_nest, class loop *loop, > } > /* Otherwise punt. */ > else > - { > - dr->aux = strides; > - return; > - } > + return; > } > tree scev_base = build_fold_addr_expr (ref); > tree scev = analyze_scalar_evolution (loop, scev_base); > - scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); > - if (! chrec_contains_undetermined (scev)) > + if (chrec_contains_undetermined (scev)) > + return; > + > + tree orig_scev = scev; > + do > + { > + scev = instantiate_scev (loop_preheader_edge (loop_nest), > + loop, orig_scev); > + if (! chrec_contains_undetermined (scev)) > + break; > + > + /* If we couldn't instantiate for the desired nest, shrink it. */ > + if (loop_nest == loop) > + return; > + loop_nest = loop_nest->inner; > + } while (1); > + > + tree sl = scev; > + class loop *expected = loop; > + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > { > - tree sl = scev; > - class loop *expected = loop; > - while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > + class loop *sl_loop = get_chrec_loop (sl); > + while (sl_loop != expected) > { > - class loop *sl_loop = get_chrec_loop (sl); > - while (sl_loop != expected) > - { > - strides->safe_push (size_int (0)); > - expected = loop_outer (expected); > - } > - strides->safe_push (CHREC_RIGHT (sl)); > - sl = CHREC_LEFT (sl); > + strides->safe_push (size_int (0)); > expected = loop_outer (expected); > } > - if (! tree_contains_chrecs (sl, NULL)) > - while (expected != loop_outer (loop_nest)) > - { > - strides->safe_push (size_int (0)); > - expected = loop_outer (expected); > - } > + strides->safe_push (CHREC_RIGHT (sl)); > + sl = CHREC_LEFT (sl); > + expected = loop_outer (expected); > } > - > - dr->aux = strides; > + if (! tree_contains_chrecs (sl, NULL)) > + while (expected != loop_outer (loop_nest)) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > } > > /* Given loop nest LOOP_NEST with innermost LOOP, the function computes > @@ -1363,9 +1374,10 @@ compute_access_strides (class loop *loop_nest, class loop *loop, > data_reference_p dr; > vec<tree> *stride; > > + class loop *interesting_loop_nest = loop_nest; > for (i = 0; datarefs.iterate (i, &dr); ++i) > { > - compute_access_stride (loop_nest, loop, dr); > + compute_access_stride (interesting_loop_nest, loop, dr); > stride = DR_ACCESS_STRIDE (dr); > if (stride->length () < num_loops) > { > diff --git a/gcc/testsuite/gfortran.dg/pr99956.f b/gcc/testsuite/gfortran.dg/pr99956.f > new file mode 100644 > index 00000000000..b5c0be3912d > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/pr99956.f > @@ -0,0 +1,45 @@ > +! { dg-do compile } > +! { dg-options "-O3 -ffast-math -floop-interchange -fdump-tree-linterchange-details" } > + > + subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm, > + $ nb,nx,ny,nz) > + implicit none > + integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1 > + > + real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz),tem > + > + real*8 a(nb,nb,nx,ny,nz), > + 1 axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz), > + 2 axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz) > + > + > + do k=1,nz > + km1=mod(k+nz-2,nz)+1 > + kp1=mod(k,nz)+1 > + do j=1,ny > + jm1=mod(j+ny-2,ny)+1 > + jp1=mod(j,ny)+1 > + do i=1,nx > + im1=mod(i+nx-2,nx)+1 > + ip1=mod(i,nx)+1 > + do l=1,nb > + tem=0.0 > + do m=1,nb > + tem=tem+ > + 1 a(l,m,i,j,k)*x(m,i,j,k)+ > + 2 axp(l,m,i,j,k)*x(m,ip1,j,k)+ > + 3 ayp(l,m,i,j,k)*x(m,i,jp1,k)+ > + 4 azp(l,m,i,j,k)*x(m,i,j,kp1)+ > + 5 axm(l,m,i,j,k)*x(m,im1,j,k)+ > + 6 aym(l,m,i,j,k)*x(m,i,jm1,k)+ > + 7 azm(l,m,i,j,k)*x(m,i,j,km1) > + enddo > + y(l,i,j,k)=tem > + enddo > + enddo > + enddo > + enddo > + return > + end > + > +! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } } > -- > 2.26.2
diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc index f45b9364644..80f749b6071 100644 --- a/gcc/gimple-loop-interchange.cc +++ b/gcc/gimple-loop-interchange.cc @@ -1280,12 +1280,15 @@ tree_loop_interchange::move_code_to_inner_loop (class loop *outer, arr[i][j - 1][k] = 0; */ static void -compute_access_stride (class loop *loop_nest, class loop *loop, +compute_access_stride (class loop *&loop_nest, class loop *loop, data_reference_p dr) { vec<tree> *strides = new vec<tree> (); - basic_block bb = gimple_bb (DR_STMT (dr)); + dr->aux = strides; + basic_block bb = gimple_bb (DR_STMT (dr)); + if (!flow_bb_inside_loop_p (loop_nest, bb)) + return; while (!flow_bb_inside_loop_p (loop, bb)) { strides->safe_push (build_int_cst (sizetype, 0)); @@ -1313,39 +1316,47 @@ compute_access_stride (class loop *loop_nest, class loop *loop, } /* Otherwise punt. */ else - { - dr->aux = strides; - return; - } + return; } tree scev_base = build_fold_addr_expr (ref); tree scev = analyze_scalar_evolution (loop, scev_base); - scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); - if (! chrec_contains_undetermined (scev)) + if (chrec_contains_undetermined (scev)) + return; + + tree orig_scev = scev; + do + { + scev = instantiate_scev (loop_preheader_edge (loop_nest), + loop, orig_scev); + if (! chrec_contains_undetermined (scev)) + break; + + /* If we couldn't instantiate for the desired nest, shrink it. */ + if (loop_nest == loop) + return; + loop_nest = loop_nest->inner; + } while (1); + + tree sl = scev; + class loop *expected = loop; + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) { - tree sl = scev; - class loop *expected = loop; - while (TREE_CODE (sl) == POLYNOMIAL_CHREC) + class loop *sl_loop = get_chrec_loop (sl); + while (sl_loop != expected) { - class loop *sl_loop = get_chrec_loop (sl); - while (sl_loop != expected) - { - strides->safe_push (size_int (0)); - expected = loop_outer (expected); - } - strides->safe_push (CHREC_RIGHT (sl)); - sl = CHREC_LEFT (sl); + strides->safe_push (size_int (0)); expected = loop_outer (expected); } - if (! tree_contains_chrecs (sl, NULL)) - while (expected != loop_outer (loop_nest)) - { - strides->safe_push (size_int (0)); - expected = loop_outer (expected); - } + strides->safe_push (CHREC_RIGHT (sl)); + sl = CHREC_LEFT (sl); + expected = loop_outer (expected); } - - dr->aux = strides; + if (! tree_contains_chrecs (sl, NULL)) + while (expected != loop_outer (loop_nest)) + { + strides->safe_push (size_int (0)); + expected = loop_outer (expected); + } } /* Given loop nest LOOP_NEST with innermost LOOP, the function computes @@ -1363,9 +1374,10 @@ compute_access_strides (class loop *loop_nest, class loop *loop, data_reference_p dr; vec<tree> *stride; + class loop *interesting_loop_nest = loop_nest; for (i = 0; datarefs.iterate (i, &dr); ++i) { - compute_access_stride (loop_nest, loop, dr); + compute_access_stride (interesting_loop_nest, loop, dr); stride = DR_ACCESS_STRIDE (dr); if (stride->length () < num_loops) { diff --git a/gcc/testsuite/gfortran.dg/pr99956.f b/gcc/testsuite/gfortran.dg/pr99956.f new file mode 100644 index 00000000000..b5c0be3912d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr99956.f @@ -0,0 +1,45 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -floop-interchange -fdump-tree-linterchange-details" } + + subroutine mat_times_vec(y,x,a,axp,ayp,azp,axm,aym,azm, + $ nb,nx,ny,nz) + implicit none + integer nb,nx,ny,nz,i,j,k,m,l,kit,im1,ip1,jm1,jp1,km1,kp1 + + real*8 y(nb,nx,ny,nz),x(nb,nx,ny,nz),tem + + real*8 a(nb,nb,nx,ny,nz), + 1 axp(nb,nb,nx,ny,nz),ayp(nb,nb,nx,ny,nz),azp(nb,nb,nx,ny,nz), + 2 axm(nb,nb,nx,ny,nz),aym(nb,nb,nx,ny,nz),azm(nb,nb,nx,ny,nz) + + + do k=1,nz + km1=mod(k+nz-2,nz)+1 + kp1=mod(k,nz)+1 + do j=1,ny + jm1=mod(j+ny-2,ny)+1 + jp1=mod(j,ny)+1 + do i=1,nx + im1=mod(i+nx-2,nx)+1 + ip1=mod(i,nx)+1 + do l=1,nb + tem=0.0 + do m=1,nb + tem=tem+ + 1 a(l,m,i,j,k)*x(m,i,j,k)+ + 2 axp(l,m,i,j,k)*x(m,ip1,j,k)+ + 3 ayp(l,m,i,j,k)*x(m,i,jp1,k)+ + 4 azp(l,m,i,j,k)*x(m,i,j,kp1)+ + 5 axm(l,m,i,j,k)*x(m,im1,j,k)+ + 6 aym(l,m,i,j,k)*x(m,i,jm1,k)+ + 7 azm(l,m,i,j,k)*x(m,i,j,km1) + enddo + y(l,i,j,k)=tem + enddo + enddo + enddo + enddo + return + end + +! { dg-final { scan-tree-dump-times "is interchanged" 1 "linterchange" } }