diff mbox

Improve PR33244 somewhat

Message ID Pine.LNX.4.64.1009071427450.29722@wotan.suse.de
State New
Headers show

Commit Message

Michael Matz Sept. 7, 2010, 12:32 p.m. UTC
Hello,

this patch fixes one problem of those preventing to vectorize the testcase 
in PR33244.  Namely that code sinking sometimes fills loop latch blocks 
preventing later vectorization.  Changed by ensuring that we don't sink 
anything into a latch block that is still empty.

The testcase I add is a modified version of the testcase in the PR because 
of unrelated problems also preventing vectorization (inlining needs to 
happen, a library with vectorized logf needs to be available, and loop 
interchange needs to work).

Regstrapping on x86_64-linux (all default languages) in progress.  Okay?


Ciao,
Michael.

Comments

Richard Biener Sept. 7, 2010, 12:35 p.m. UTC | #1
On Tue, Sep 7, 2010 at 2:32 PM, Michael Matz <matz@suse.de> wrote:
> Hello,
>
> this patch fixes one problem of those preventing to vectorize the testcase
> in PR33244.  Namely that code sinking sometimes fills loop latch blocks
> preventing later vectorization.  Changed by ensuring that we don't sink
> anything into a latch block that is still empty.
>
> The testcase I add is a modified version of the testcase in the PR because
> of unrelated problems also preventing vectorization (inlining needs to
> happen, a library with vectorized logf needs to be available, and loop
> interchange needs to work).
>
> Regstrapping on x86_64-linux (all default languages) in progress.  Okay?

Ok.

Thanks,
Richard.

>
> Ciao,
> Michael.
> --
>        PR tree-optimization/33244
>        * tree-ssa-sink.c (statement_sink_location): Don't sink into
>        empty loop latches.
>
> testsuite/
>        PR tree-optimization/33244
>        * gfortran.dg/vect/fast-math-vect-8.f90: New test.
>
> Index: tree-ssa-sink.c
> ===================================================================
> --- tree-ssa-sink.c     (revision 163773)
> +++ tree-ssa-sink.c     (working copy)
> @@ -428,6 +428,12 @@ statement_sink_location (gimple stmt, ba
>       || sinkbb->loop_father != frombb->loop_father)
>     return false;
>
> +  /* If the latch block is empty, don't make it non-empty by sinking
> +     something into it.  */
> +  if (sinkbb == frombb->loop_father->latch
> +      && empty_block_p (sinkbb))
> +    return false;
> +
>   /* Move the expression to a post dominator can't reduce the number of
>      executions.  */
>   if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
> Index: testsuite/gfortran.dg/vect/fast-math-vect-8.f90
> ===================================================================
> --- testsuite/gfortran.dg/vect/fast-math-vect-8.f90     (revision 0)
> +++ testsuite/gfortran.dg/vect/fast-math-vect-8.f90     (revision 0)
> @@ -0,0 +1,93 @@
> +! { dg-do compile }
> +! { dg-require-effective-target vect_float }
> +
> +module solv_cap
> +
> +  implicit none
> +
> +  public  :: init_solve
> +
> +  integer, parameter, public :: dp = 4
> +
> +  real(kind=dp), private :: Pi, Mu0, c0, eps0
> +  logical,       private :: UseFFT, UsePreco
> +  real(kind=dp), private :: D1, D2
> +  integer,       private, save :: Ng1=0, Ng2=0
> +  integer,       private, pointer,     dimension(:,:)  :: Grid
> +  real(kind=dp), private, allocatable, dimension(:,:)  :: G
> +
> +contains
> +
> +  subroutine init_solve(Grid_in, GrSize1, GrSize2, UseFFT_in, UsePreco_in)
> +    integer, intent(in), target, dimension(:,:) :: Grid_in
> +    real(kind=dp), intent(in)  :: GrSize1, GrSize2
> +    logical,       intent(in)  :: UseFFT_in, UsePreco_in
> +    integer                    :: i, j
> +
> +    Pi = acos(-1.0_dp)
> +    Mu0 = 4e-7_dp * Pi
> +    c0 = 299792458
> +    eps0 = 1 / (Mu0 * c0**2)
> +
> +    UseFFT = UseFFT_in
> +    UsePreco = UsePreco_in
> +
> +    if(Ng1 /= 0 .and. allocated(G) ) then
> +      deallocate( G )
> +    end if
> +
> +    Grid => Grid_in
> +    Ng1 = size(Grid, 1)
> +    Ng2 = size(Grid, 2)
> +    D1 = GrSize1/Ng1
> +    D2 = GrSize2/Ng2
> +
> +    allocate( G(0:Ng1,0:Ng2) )
> +
> +    write(unit=*, fmt=*) "Calculating G"
> +    do i=0,Ng1
> +      do j=0,Ng2
> +        G(j,i) = Ginteg( -D1/2,-D2/2, D1/2,D2/2, i*D1,j*D2 )
> +      end do
> +    end do
> +
> +    if(UseFFT) then
> +      write(unit=*, fmt=*) "Transforming G"
> +      call FourirG(G,1)
> +    end if
> +
> +    return
> +
> +
> +  contains
> +  function Ginteg(xq1,yq1, xq2,yq2, xp,yp)  result(G)
> +    real(kind=dp), intent(in) :: xq1,yq1, xq2,yq2, xp,yp
> +    real(kind=dp)             :: G
> +    real(kind=dp)             :: x1,x2,y1,y2,t
> +    x1 = xq1-xp
> +    x2 = xq2-xp
> +    y1 = yq1-yp
> +    y2 = yq2-yp
> +
> +    if (x1+x2 < 0) then
> +      t = -x1
> +      x1 = -x2
> +      x2 = t
> +    end if
> +    if (y1+y2 < 0) then
> +      t = -y1
> +      y1 = -y2
> +      y2 = t
> +    end if
> +
> +    G = (x2*y2)-(x1*y2)-(x2*y1)+(x1*y1)
> +
> +    return
> +  end function Ginteg
> +
> +  end subroutine init_solve
> +
> +end module solv_cap
> +
> +! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
> +! { dg-final { cleanup-tree-dump "vect" } }
>
H.J. Lu Nov. 19, 2010, 5:24 a.m. UTC | #2
On Tue, Sep 7, 2010 at 5:32 AM, Michael Matz <matz@suse.de> wrote:
> Hello,
>
> this patch fixes one problem of those preventing to vectorize the testcase
> in PR33244.  Namely that code sinking sometimes fills loop latch blocks
> preventing later vectorization.  Changed by ensuring that we don't sink
> anything into a latch block that is still empty.
>
> The testcase I add is a modified version of the testcase in the PR because
> of unrelated problems also preventing vectorization (inlining needs to
> happen, a library with vectorized logf needs to be available, and loop
> interchange needs to work).
>
> Regstrapping on x86_64-linux (all default languages) in progress.  Okay?
>
>
> Ciao,
> Michael.
> --
>        PR tree-optimization/33244
>        * tree-ssa-sink.c (statement_sink_location): Don't sink into
>        empty loop latches.
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46077


H.J.
H.J. Lu Jan. 12, 2011, 2:22 p.m. UTC | #3
On Thu, Nov 18, 2010 at 9:24 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Tue, Sep 7, 2010 at 5:32 AM, Michael Matz <matz@suse.de> wrote:
>> Hello,
>>
>> this patch fixes one problem of those preventing to vectorize the testcase
>> in PR33244.  Namely that code sinking sometimes fills loop latch blocks
>> preventing later vectorization.  Changed by ensuring that we don't sink
>> anything into a latch block that is still empty.
>>
>> The testcase I add is a modified version of the testcase in the PR because
>> of unrelated problems also preventing vectorization (inlining needs to
>> happen, a library with vectorized logf needs to be available, and loop
>> interchange needs to work).
>>
>> Regstrapping on x86_64-linux (all default languages) in progress.  Okay?
>>
>>
>> Ciao,
>> Michael.
>> --
>>        PR tree-optimization/33244
>>        * tree-ssa-sink.c (statement_sink_location): Don't sink into
>>        empty loop latches.
>>
>
> This caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46077
>

This also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47265
diff mbox

Patch

Index: tree-ssa-sink.c
===================================================================
--- tree-ssa-sink.c	(revision 163773)
+++ tree-ssa-sink.c	(working copy)
@@ -428,6 +428,12 @@  statement_sink_location (gimple stmt, ba
       || sinkbb->loop_father != frombb->loop_father)
     return false;
 
+  /* If the latch block is empty, don't make it non-empty by sinking
+     something into it.  */
+  if (sinkbb == frombb->loop_father->latch
+      && empty_block_p (sinkbb))
+    return false;
+
   /* Move the expression to a post dominator can't reduce the number of
      executions.  */
   if (dominated_by_p (CDI_POST_DOMINATORS, frombb, sinkbb))
Index: testsuite/gfortran.dg/vect/fast-math-vect-8.f90
===================================================================
--- testsuite/gfortran.dg/vect/fast-math-vect-8.f90	(revision 0)
+++ testsuite/gfortran.dg/vect/fast-math-vect-8.f90	(revision 0)
@@ -0,0 +1,93 @@ 
+! { dg-do compile } 
+! { dg-require-effective-target vect_float } 
+
+module solv_cap
+
+  implicit none
+
+  public  :: init_solve
+
+  integer, parameter, public :: dp = 4
+
+  real(kind=dp), private :: Pi, Mu0, c0, eps0
+  logical,       private :: UseFFT, UsePreco
+  real(kind=dp), private :: D1, D2
+  integer,       private, save :: Ng1=0, Ng2=0
+  integer,       private, pointer,     dimension(:,:)  :: Grid
+  real(kind=dp), private, allocatable, dimension(:,:)  :: G
+
+contains
+
+  subroutine init_solve(Grid_in, GrSize1, GrSize2, UseFFT_in, UsePreco_in)
+    integer, intent(in), target, dimension(:,:) :: Grid_in
+    real(kind=dp), intent(in)  :: GrSize1, GrSize2
+    logical,       intent(in)  :: UseFFT_in, UsePreco_in
+    integer                    :: i, j
+
+    Pi = acos(-1.0_dp)
+    Mu0 = 4e-7_dp * Pi
+    c0 = 299792458
+    eps0 = 1 / (Mu0 * c0**2)
+
+    UseFFT = UseFFT_in
+    UsePreco = UsePreco_in
+
+    if(Ng1 /= 0 .and. allocated(G) ) then
+      deallocate( G )
+    end if
+
+    Grid => Grid_in
+    Ng1 = size(Grid, 1)
+    Ng2 = size(Grid, 2)
+    D1 = GrSize1/Ng1
+    D2 = GrSize2/Ng2
+
+    allocate( G(0:Ng1,0:Ng2) )
+
+    write(unit=*, fmt=*) "Calculating G"
+    do i=0,Ng1
+      do j=0,Ng2
+        G(j,i) = Ginteg( -D1/2,-D2/2, D1/2,D2/2, i*D1,j*D2 )
+      end do
+    end do
+
+    if(UseFFT) then
+      write(unit=*, fmt=*) "Transforming G"
+      call FourirG(G,1)
+    end if
+
+    return
+
+
+  contains
+  function Ginteg(xq1,yq1, xq2,yq2, xp,yp)  result(G)
+    real(kind=dp), intent(in) :: xq1,yq1, xq2,yq2, xp,yp
+    real(kind=dp)             :: G
+    real(kind=dp)             :: x1,x2,y1,y2,t
+    x1 = xq1-xp
+    x2 = xq2-xp
+    y1 = yq1-yp
+    y2 = yq2-yp
+
+    if (x1+x2 < 0) then
+      t = -x1
+      x1 = -x2
+      x2 = t
+    end if
+    if (y1+y2 < 0) then
+      t = -y1
+      y1 = -y2
+      y2 = t
+    end if
+
+    G = (x2*y2)-(x1*y2)-(x2*y1)+(x1*y1)
+
+    return
+  end function Ginteg
+
+  end subroutine init_solve
+
+end module solv_cap
+
+! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } 
+! { dg-final { cleanup-tree-dump "vect" } }