diff mbox

[gomp4] correct a tile bug with combined loops in fortran

Message ID b0310400-9891-dafd-8112-df45bf3e56ae@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis Oct. 6, 2016, 3:14 a.m. UTC
Nathan spotted a bug where the fortran FE wasn't lowering the tile
clause properly in combined parallel/kernels loops. The problem was that
gfc_trans_omp_d has two sources of clauses, code->ext.omp_clauses and
do_clauses. Because of the way that the fortran FE splits combined
loops, the tile clause is only associated with do_clauses. This patch
teaches gfc_trans_omp_do to check for the tile clause in both code and
do_clauses.

I've applied this patch to gomp-4_0-branch.

Cesar

Comments

Nathan Sidwell Oct. 6, 2016, 10:16 a.m. UTC | #1
On 10/05/16 23:14, Cesar Philippidis wrote:
> Nathan spotted a bug where the fortran FE wasn't lowering the tile
> clause properly in combined parallel/kernels loops. The problem was that
> gfc_trans_omp_d has two sources of clauses, code->ext.omp_clauses and
> do_clauses. Because of the way that the fortran FE splits combined
> loops, the tile clause is only associated with do_clauses. This patch
> teaches gfc_trans_omp_do to check for the tile clause in both code and
> do_clauses.
>
> I've applied this patch to gomp-4_0-branch.


thanks!
diff mbox

Patch

2016-10-05  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/fortran/
	* trans-openmp.c (gfc_trans_omp_do): Check do_clauses for the
        tile clause if it's not present in clauses.

	gcc/testsuite/
	* gfortran.dg/goacc/tile-lowering.f95: Add more coverage.

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index e0e1c8b..37744e1 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3353,14 +3353,15 @@  gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
   vec<dovar_init> inits = vNULL;
   dovar_init *di;
   unsigned ix;
+  gfc_expr_list *tile = do_clauses ? do_clauses->tile_list : clauses->tile_list;
 
   /* Both collapsed and tiled loops are lowered the same way.  In
      OpenACC, those clauses are not compatible, so prioritize the tile
      clause, if present.  */
-  if (clauses->tile_list)
+  if (tile)
     {
       collapse = 0;
-      for (gfc_expr_list *el = clauses->tile_list; el; el = el->next)
+      for (gfc_expr_list *el = tile; el; el = el->next)
 	collapse++;
     }
 
diff --git a/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95 b/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95
index 3774b38..1cb8b9c 100644
--- a/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95
@@ -1,7 +1,7 @@ 
 ! { dg-do compile }
 ! { dg-additional-options "-fdump-tree-original" }
 
-subroutine test
+subroutine par
   integer i, j, k
 
   !$acc parallel
@@ -12,7 +12,7 @@  subroutine test
   !$acc loop tile (*)
   do i = 1, 10
   end do
-  
+
   !$acc loop tile (1,2)
   do i = 1, 10
      do j = 1, 10
@@ -37,7 +37,6 @@  subroutine test
      end do
   end do
 
-  
   !$acc loop tile (1,2,3)
   do i = 1, 10
      do j = 1, 10
@@ -70,17 +69,224 @@  subroutine test
      end do
   end do
   !$acc end parallel
-end subroutine test
-
-! { dg-final { scan-tree-dump-times "tile\\(1\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(0\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(1, 2\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(0, 2\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(1, 0\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(0, 0\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(1, 2, 3\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(0, 2, 3\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(1, 0, 3\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "tile\\(1, 2, 0\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times "for \\(" 22 "original" } }
+end subroutine par
+
+subroutine kerns
+  integer i, j, k
+
+  !$acc kernels
+  !$acc loop tile (1)
+  do i = 1, 10
+  end do
+
+  !$acc loop tile (*)
+  do i = 1, 10
+  end do
+
+  !$acc loop tile (1,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc loop tile (*,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc loop tile (1,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc loop tile (*,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc loop tile (1,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc loop tile (*,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc loop tile (1,*,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc loop tile (1,2,*)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+  !$acc end kernels
+end subroutine kerns
+
+subroutine parloop
+  integer i, j, k
+
+  !$acc parallel loop tile (1)
+  do i = 1, 10
+  end do
+
+  !$acc parallel loop tile (*)
+  do i = 1, 10
+  end do
+
+  !$acc parallel loop tile (1,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc parallel loop tile (*,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc parallel loop tile (1,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc parallel loop tile (*,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc parallel loop tile (1,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc parallel loop tile (*,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc parallel loop tile (1,*,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc parallel loop tile (1,2,*)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+end subroutine parloop
+
+subroutine kernloop
+  integer i, j, k
+
+  !$acc kernels loop tile (1)
+  do i = 1, 10
+  end do
+
+  !$acc kernels loop tile (*)
+  do i = 1, 10
+  end do
+
+  !$acc kernels loop tile (1,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc kernels loop tile (*,2)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc kernels loop tile (1,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc kernels loop tile (*,*)
+  do i = 1, 10
+     do j = 1, 10
+     end do
+  end do
+
+  !$acc kernels loop tile (1,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc kernels loop tile (*,2,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc kernels loop tile (1,*,3)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+
+  !$acc kernels loop tile (1,2,*)
+  do i = 1, 10
+     do j = 1, 10
+        do k = 1, 10
+        end do
+     end do
+  end do
+end subroutine kernloop
+
 
+! { dg-final { scan-tree-dump-times "tile\\(1\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(0\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(1, 2\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(0, 2\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(1, 0\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(0, 0\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(1, 2, 3\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(0, 2, 3\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(1, 0, 3\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "tile\\(1, 2, 0\\)" 4 "original" } }
+! { dg-final { scan-tree-dump-times "for \\(" 88 "original" } }
+! { dg-final { scan-tree-dump-times "while \\(" 0 "original" } }