2015-06-11 Cesar Philippidis <cesar@codesourcery.com>
gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Handle the static
argument to the gang clause.
(gfc_trans_oacc_combined_directive): Pass the gang_expr, gang_static,
worker_expr and vector_expr members to loop_clauses.
gcc/
* omp-low.c (extract_omp_for_data): Adjust the chunk_size
based on the static argument of the gang clause.
gcc/testsuite/
* gfortran.dg/goacc/gang-static.f95: New test.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: New test.
* testsuite/libgomp.oacc-fortran/gang-static-1.f90: New test.
@@ -2646,7 +2646,17 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
tree gang_var
= gfc_convert_expr_to_tree (block, clauses->gang_expr);
c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG);
- OMP_CLAUSE_GANG_EXPR (c) = gang_var;
+ if (clauses->gang_static)
+ OMP_CLAUSE_GANG_STATIC_EXPR (c) = gang_var;
+ else
+ OMP_CLAUSE_GANG_EXPR (c) = gang_var;
+ omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+ }
+ else if (clauses->gang_static)
+ {
+ /* This corresponds to gang (static: *). */
+ c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG);
+ OMP_CLAUSE_GANG_STATIC_EXPR (c) = integer_minus_one_node;
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
}
else
@@ -3476,8 +3486,12 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
sizeof (construct_clauses));
loop_clauses.collapse = construct_clauses.collapse;
loop_clauses.gang = construct_clauses.gang;
+ loop_clauses.gang_expr = construct_clauses.gang_expr;
+ loop_clauses.gang_static = construct_clauses.gang_static;
loop_clauses.vector = construct_clauses.vector;
+ loop_clauses.vector_expr = construct_clauses.vector_expr;
loop_clauses.worker = construct_clauses.worker;
+ loop_clauses.worker_expr = construct_clauses.worker_expr;
loop_clauses.seq = construct_clauses.seq;
loop_clauses.independent = construct_clauses.independent;
construct_clauses.collapse = 0;
@@ -755,13 +755,26 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
fd->loop.cond_code = LT_EXPR;
}
- /* For OpenACC loops, force a chunk size of one, as this avoids the default
- scheduling where several subsequent iterations are being executed by the
- same thread. */
+ /* For OpenACC loops, force a chunk size of one, unless a gang loop
+ contains a static argument. This avoids the default scheduling where
+ several subsequent iterations are being executed by the same thread. */
if (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
{
gcc_assert (fd->chunk_size == NULL_TREE);
- fd->chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
+
+ tree gang = find_omp_clause (gimple_omp_for_clauses (for_stmt),
+ OMP_CLAUSE_GANG);
+ tree chunk_size = NULL_TREE;
+
+ if (gang)
+ {
+ chunk_size = OMP_CLAUSE_GANG_STATIC_EXPR (gang);
+ }
+
+ if (!chunk_size || chunk_size == integer_minus_one_node)
+ chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
+
+ fd->chunk_size = chunk_size;
}
/* Extract the OpenACC gang, worker and vector clauses. */
new file mode 100644
@@ -0,0 +1,69 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-omplower" }
+
+program main
+ integer, parameter :: n = 100
+ integer i, a(n), b(n)
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ !$acc parallel loop gang (static:*) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 0
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 0, n)
+
+ !$acc parallel loop gang (static:1) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 1
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 1, n)
+
+ !$acc parallel loop gang (static:2) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 2
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 2, n)
+
+ !$acc parallel loop gang (static:5) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 5
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 5, n)
+
+ !$acc parallel loop gang (static:20) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 20
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 20, n)
+
+end program main
+
+subroutine test (a, b, sarg, n)
+ integer n
+ integer a (n), b(n), sarg
+ integer i
+
+ do i = 1, n
+ if (a(i) .ne. b(i) + sarg) call abort ()
+ end do
+end subroutine test
+
+! { dg-final { scan-tree-dump-times "gang\\(static:\\\*\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:1\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:2\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:5\\)" 1 "omplower" } }
+! { dg-final { scan-tree-dump-times "gang\\(static:20\\)" 1 "omplower" } }
+! { dg-final { cleanup-tree-dump "omplower" } }
new file mode 100644
@@ -0,0 +1,47 @@
+#include <assert.h>
+
+#define N 100
+
+int test(int *a, int *b, int sarg)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ assert (a[i] == b[i] + sarg);
+}
+
+int
+main ()
+{
+ int a[N], b[N];
+ int i;
+
+ for (i = 0; i < N; i++)
+ b[i] = i+1;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 0;
+
+ test (a, b, 0);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 1;
+
+ test (a, b, 1);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 5;
+
+ test (a, b, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 20;
+
+ test (a, b, 20);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <assert.h>
+
+#define N 100
+
+int test(int *a, int sarg)
+{
+ int i, j, gang;
+
+ if (sarg == 0)
+ sarg = 1;
+
+ for (i = 0, gang = 0; i < N; i+=sarg, gang++)
+ {
+ for (j = 0; j < sarg; j++)
+ assert (a[i] == gang % 10);
+ }
+}
+
+int
+main ()
+{
+ int a[N];
+ int i;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = __builtin_GOACC_ctaid (0);
+
+ test (a, 0);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = __builtin_GOACC_ctaid (0);
+
+ test (a, 1);
+
+#pragma acc parallel loop gang (static:2) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = __builtin_GOACC_ctaid (0);
+
+ test (a, 2);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = __builtin_GOACC_ctaid (0);
+
+ test (a, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = __builtin_GOACC_ctaid (0);
+
+ test (a, 20);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,61 @@
+! { dg-do run }
+
+program main
+ integer, parameter :: n = 100
+ integer i, a(n), b(n)
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ !$acc parallel loop gang (static:*) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 0
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 0, n)
+
+ !$acc parallel loop gang (static:1) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 1
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 1, n)
+
+ !$acc parallel loop gang (static:2) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 2
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 2, n)
+
+ !$acc parallel loop gang (static:5) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 5
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 5, n)
+
+ !$acc parallel loop gang (static:20) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 20
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 20, n)
+
+end program main
+
+subroutine test (a, b, sarg, n)
+ integer n
+ integer a (n), b(n), sarg
+ integer i
+
+ do i = 1, n
+ if (a(i) .ne. b(i) + sarg) call abort ()
+ end do
+end subroutine test