2015-11-04 Cesar Philippidis <cesar@codesourcery.com>
gcc/cp/
* pt.c (tsubst_omp_clauses): Add support for OMP_CLAUSE_{NUM_GANGS,
NUM_WORKERS,VECTOR_LENGTH,GANG,WORKER,VECTOR,ASYNC,WAIT,TILE,AUTO,
INDEPENDENT,SEQ}.
(tsubst_expr): Add support for OMP_CLAUSE_{KERNELS,PARALLEL,LOOP}.
gcc/testsuite/
* g++.dg/goacc/template-reduction.C: New test.
* g++.dg/goacc/template.C: New test.
@@ -14395,6 +14395,15 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields,
case OMP_CLAUSE_PRIORITY:
case OMP_CLAUSE_ORDERED:
case OMP_CLAUSE_HINT:
+ case OMP_CLAUSE_NUM_GANGS:
+ case OMP_CLAUSE_NUM_WORKERS:
+ case OMP_CLAUSE_VECTOR_LENGTH:
+ case OMP_CLAUSE_GANG:
+ case OMP_CLAUSE_WORKER:
+ case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
+ case OMP_CLAUSE_TILE:
OMP_CLAUSE_OPERAND (nc, 0)
= tsubst_expr (OMP_CLAUSE_OPERAND (oc, 0), args, complain,
in_decl, /*integral_constant_expression_p=*/false);
@@ -14449,6 +14458,9 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields,
case OMP_CLAUSE_THREADS:
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
break;
default:
gcc_unreachable ();
@@ -15197,6 +15209,15 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
}
break;
+ case OACC_KERNELS:
+ case OACC_PARALLEL:
+ tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, false, args, complain,
+ in_decl);
+ stmt = begin_omp_parallel ();
+ RECUR (OMP_BODY (t));
+ finish_omp_construct (TREE_CODE (t), stmt, tmp);
+ break;
+
case OMP_PARALLEL:
r = push_omp_privatization_clauses (OMP_PARALLEL_COMBINED (t));
tmp = tsubst_omp_clauses (OMP_PARALLEL_CLAUSES (t), false, true,
@@ -15227,6 +15248,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
case CILK_FOR:
case OMP_DISTRIBUTE:
case OMP_TASKLOOP:
+ case OACC_LOOP:
{
tree clauses, body, pre_body;
tree declv = NULL_TREE, initv = NULL_TREE, condv = NULL_TREE;
@@ -15235,7 +15257,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
int i;
r = push_omp_privatization_clauses (OMP_FOR_INIT (t) == NULL_TREE);
- clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false, true,
+ clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false,
+ TREE_CODE (t) != OACC_LOOP,
args, complain, in_decl);
if (OMP_FOR_INIT (t) != NULL_TREE)
{
@@ -15305,9 +15328,11 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
pop_omp_privatization_clauses (r);
break;
+ case OACC_DATA:
case OMP_TARGET_DATA:
case OMP_TARGET:
- tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, true,
+ tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false,
+ TREE_CODE (t) != OACC_DATA,
args, complain, in_decl);
keep_next_level (true);
stmt = begin_omp_structured_block ();
@@ -15331,6 +15356,16 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
add_stmt (t);
break;
+ case OACC_ENTER_DATA:
+ case OACC_EXIT_DATA:
+ case OACC_UPDATE:
+ tmp = tsubst_omp_clauses (OMP_STANDALONE_CLAUSES (t), false, false,
+ args, complain, in_decl);
+ t = copy_node (t);
+ OMP_STANDALONE_CLAUSES (t) = tmp;
+ add_stmt (t);
+ break;
+
case OMP_ORDERED:
tmp = tsubst_omp_clauses (OMP_ORDERED_CLAUSES (t), false, true,
args, complain, in_decl);
new file mode 100644
@@ -0,0 +1,104 @@
+// This error is temporary. Remove when support is added for these clauses
+// in the middle end.
+// { dg-prune-output "sorry, unimplemented" }
+
+extern void abort ();
+
+const int n = 100;
+
+// Check explicit template copy map
+
+template<typename T> T
+sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check implicit template copy map
+
+template<typename T> T
+sum ()
+{
+ T s = 0;
+ T array[n];
+
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+#pragma acc wait
+
+ return s;
+}
+
+// Check present and async and an explicit firstprivate
+
+template<typename T> T
+async_sum (int c)
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += i+c;
+
+#pragma acc wait
+
+ return s;
+}
+
+int
+main()
+{
+ int a[n];
+ int result = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = i+1;
+ result += i+1;
+ }
+
+ if (sum (a) != result)
+ abort ();
+
+ if (sum<int> () != result)
+ abort ();
+
+#pragma acc enter data copyin (a)
+ if (async_sum (a) != result)
+ abort ();
+
+ if (async_sum<int> (1) != result)
+ abort ();
+#pragma acc exit data delete (a)
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,140 @@
+// This error is temporary. Remove when support is added for these clauses
+// in the middle end.
+// { dg-prune-output "sorry, unimplemented" }
+
+#pragma acc routine
+template <typename T> T
+accDouble(int val)
+{
+ return val * 2;
+}
+
+template<typename T> T
+oacc_parallel_copy (T a)
+{
+ T b = 0;
+ char w = 1;
+ int x = 2;
+ float y = 3;
+ double z = 4;
+
+#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
+ {
+ b = a;
+ }
+
+#pragma acc parallel num_gangs (a) copy (w, x, y, z)
+ {
+ w = accDouble<char>(w);
+ x = accDouble<int>(x);
+ y = accDouble<float>(y);
+ z = accDouble<double>(z);
+ }
+
+#pragma acc parallel num_gangs (a) if (1)
+ {
+#pragma acc loop auto tile (a, 3)
+ for (int i = 0; i < a; i++)
+ for (int j = 0; j < 5; j++)
+ b = a;
+
+#pragma acc loop seq
+ for (int i = 0; i < a; i++)
+ b = a;
+ }
+
+ T c;
+
+#pragma acc parallel num_workers (10)
+ {
+#pragma acc atomic capture
+ c = b++;
+
+#pragma atomic update
+ c++;
+
+#pragma acc atomic read
+ b = a;
+
+#pragma acc atomic write
+ b = a;
+ }
+
+#pragma acc parallel reduction (+:c)
+ {
+ c = 1;
+ }
+
+#pragma acc data if (1) copy (b)
+ {
+ #pragma acc parallel
+ {
+ b = a;
+ }
+ }
+
+#pragma acc enter data copyin (b)
+#pragma acc parallel present (b)
+ {
+ b = a;
+ }
+
+#pragma acc update host (b)
+#pragma acc update self (b)
+#pragma acc update device (b)
+#pragma acc exit data delete (b)
+
+ return b;
+}
+
+template<typename T> T
+oacc_kernels_copy (T a)
+{
+ T b = 0;
+ T c = 0;
+ char w = 1;
+ int x = 2;
+ float y = 3;
+ double z = 4;
+
+#pragma acc kernels copy (w, x, y, z)
+ {
+ w = accDouble<char>(w);
+ x = accDouble<int>(x);
+ y = accDouble<float>(y);
+ z = accDouble<double>(z);
+ }
+
+#pragma acc kernels copyout (b) copyin (a)
+ b = a;
+
+#pragma acc kernels loop reduction (+:c)
+ for (int i = 0; i < 10; i++)
+ {
+ c = 1;
+ }
+
+#pragma acc data if (1) copy (b)
+ {
+ #pragma acc kernels
+ {
+ b = a;
+ }
+ }
+
+#pragma acc enter data copyin (b)
+#pragma acc kernels present (b)
+ {
+ b = a;
+ }
+ return b;
+}
+
+int
+main ()
+{
+ int b = oacc_parallel_copy<int> (5);
+ int c = oacc_kernels_copy<int> (5);
+
+ return b + c;
+}