@@ -1,5 +1,8 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
+ * passes.def: Add pass_scev_cprop to pass_oacc_kernels.
+ * tree-ssa-loop.c (pass_scev_cprop::clone): New function.
+
* passes.def: Add pass_parallelize_loops_oacc_kernels in pass group
pass_oacc_kernels.
* tree-parloops.c (create_parallel_loop, gen_parallel_loop): Add
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_tree_loop_init);
NEXT_PASS (pass_lim);
NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_parallelize_loops_oacc_kernels);
NEXT_PASS (pass_expand_omp_ssa);
NEXT_PASS (pass_tree_loop_done);
@@ -1,6 +1,13 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
+ * gcc.dg/pr41488.c: Update for new pass_scev_cprop.
+ * gcc.dg/tree-ssa/loop-17.c: Likewise.
+ * gcc.dg/tree-ssa/loop-39.c: Likewise.
+ * gcc.dg/tree-ssa/scev-7.c: Likewise.
+ * gfortran.dg/goacc/kernels-loop-2.f95: New test.
+ * gfortran.dg/goacc/kernels-loop.f95: New test.
+
* c-c++-common/goacc/kernels-loop-2.c: New test.
* c-c++-common/goacc/kernels-loop.c: New test.
* c-c++-common/goacc/kernels-loop-n.c: New test.
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-scev" } */
struct struct_t
{
@@ -14,5 +14,5 @@ void foo (struct struct_t* sp, int start, int end)
sp->data[i+start] = 0;
}
-/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-sccp-details" } */
+/* { dg-options "-O -fdump-tree-sccp2-details" } */
/* To determine the number of iterations in this loop we need to fold
p_4 + 4B > p_4 + 8B to false. This transformation has caused
@@ -15,5 +15,5 @@ int foo (int *p)
return i;
}
-/* { dg-final { scan-tree-dump "# of iterations 1, bounded by 1" "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump "# of iterations 1, bounded by 1" "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-details" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-details" } */
int
foo (unsigned int n)
@@ -22,5 +22,5 @@ foo (unsigned int n)
return r + n;
}
-/* { dg-final { scan-tree-dump "# of iterations \[^\n\r]*, bounded by 8" "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump "# of iterations \[^\n\r]*, bounded by 8" "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sccp-scev" } */
+/* { dg-options "-O2 -fdump-tree-sccp2-scev" } */
struct struct_t
{
@@ -14,5 +14,5 @@ void foo (struct struct_t* sp, int start, int end)
sp->data[i+start] = 0;
}
-/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp" } } */
-/* { dg-final { cleanup-tree-dump "sccp" } } */
+/* { dg-final { scan-tree-dump-times "Simplify PEELED_CHREC into POLYNOMIAL_CHREC" 1 "sccp2" } } */
+/* { dg-final { cleanup-tree-dump "sccp2" } } */
new file mode 100644
@@ -0,0 +1,46 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc kernels copyout (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels copyout (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,40 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only one loop is analyzed, and that it can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
@@ -425,6 +425,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_scev_cprop; }
virtual unsigned int execute (function *) { return scev_const_prop (); }
+ opt_pass * clone () { return new pass_scev_cprop (m_ctxt); }
}; // class pass_scev_cprop
@@ -1,6 +1,9 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
+ * testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test.
+
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: New test.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: New test.
new file mode 100644
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc kernels copyout (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels copyout (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,28 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
Hi! On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries <Tom_deVries@mentor.com> wrote: > I'm submitting a patch series with initial support for the oacc kernels directive. Committed to gomp-4_0-branch in r222286: commit 0c33234340aa17536c2c86e0982c42070c89226b Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Apr 21 20:22:54 2015 +0000 Handle global loop counters in fortran oacc kernels Unable to have loop counters with a scope limited to the kernels region, and the fact that function scope inhibits parallelization, at the technical level, it seems possible to do DCE and get rid of the dead code that is inhibiting parallelization (in other words, the code copying the loop iterator value out of the region), but probably some effort would be involved. Another possibility is to add an assign of the final value of the loop iteration variable after the loop to cut the dependency, though this will only work for loops where that value is know at compile time -- which is exactly what pass_scev_cprop does. gcc/ * passes.def: Add pass_scev_cprop to pass_oacc_kernels. * tree-ssa-loop.c (pass_scev_cprop::clone): New function. gcc/testsuite/ * gcc.dg/pr41488.c: Update for new pass_scev_cprop. * gcc.dg/tree-ssa/loop-17.c: Likewise. * gcc.dg/tree-ssa/loop-39.c: Likewise. * gcc.dg/tree-ssa/scev-7.c: Likewise. * gfortran.dg/goacc/kernels-loop-2.f95: New test. * gfortran.dg/goacc/kernels-loop.f95: New test. libgomp/ * testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222286 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 3 ++ gcc/passes.def | 1 + gcc/testsuite/ChangeLog.gomp | 7 +++ gcc/testsuite/gcc.dg/pr41488.c | 6 +-- gcc/testsuite/gcc.dg/tree-ssa/loop-17.c | 6 +-- gcc/testsuite/gcc.dg/tree-ssa/loop-39.c | 6 +-- gcc/testsuite/gcc.dg/tree-ssa/scev-7.c | 6 +-- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 | 46 ++++++++++++++++++++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 | 40 +++++++++++++++++ gcc/tree-ssa-loop.c | 1 + libgomp/ChangeLog.gomp | 3 ++ .../libgomp.oacc-fortran/kernels-loop-2.f95 | 32 ++++++++++++++ .../libgomp.oacc-fortran/kernels-loop.f95 | 28 ++++++++++++ 13 files changed, 173 insertions(+), 12 deletions(-) Grüße, Thomas