2016-04-15 Cesar Philippidis <cesar@codesourcery.com>
gcc/c-family/
PR middle-end/70626
* c-common.h (c_oacc_split_loop_clauses): Add boolean argument.
* c-omp.c (c_oacc_split_loop_clauses): Use it to duplicate
reduction clauses in acc parallel loops.
gcc/c/
PR middle-end/70626
* c-parser.c (c_parser_oacc_loop): Don't augment mask with
OACC_LOOP_CLAUSE_MASK.
(c_parser_oacc_kernels_parallel): Update call to
c_oacc_split_loop_clauses.
gcc/cp/
PR middle-end/70626
* parser.c (cp_parser_oacc_loop): Don't augment mask with
OACC_LOOP_CLAUSE_MASK.
(cp_parser_oacc_kernels_parallel): Update call to
c_oacc_split_loop_clauses.
gcc/fortran/
PR middle-end/70626
* trans-openmp.c (gfc_trans_oacc_combined_directive): Duplicate
the reduction clause in both parallel and loop directives.
gcc/testsuite/
PR middle-end/70626
* c-c++-common/goacc/combined-reduction.c: New test.
* gfortran.dg/goacc/reduction-2.f95: Add check for kernels reductions.
libgomp/
PR middle-end/70626
* testsuite/libgomp.oacc-c++/template-reduction.C: Adjust test.
* testsuite/libgomp.oacc-c-c++-common/combined-reduction.c: New test.
* testsuite/libgomp.oacc-fortran/combined-reduction.f90: New test.
@@ -1276,7 +1276,7 @@ extern bool c_omp_check_loop_iv (tree, tree, walk_tree_lh);
extern bool c_omp_check_loop_iv_exprs (location_t, tree, tree, tree, tree,
walk_tree_lh);
extern tree c_finish_oacc_wait (location_t, tree, tree);
-extern tree c_oacc_split_loop_clauses (tree, tree *);
+extern tree c_oacc_split_loop_clauses (tree, tree *, bool);
extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
tree, tree *);
extern tree c_omp_declare_simd_clauses_to_numbers (tree, tree);
@@ -861,9 +861,10 @@ c_omp_check_loop_iv_exprs (location_t stmt_loc, tree declv, tree decl,
#pragma acc parallel loop */
tree
-c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses)
+c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses,
+ bool is_parallel)
{
- tree next, loop_clauses;
+ tree next, loop_clauses, nc;
loop_clauses = *not_loop_clauses = NULL_TREE;
for (; clauses ; clauses = next)
@@ -882,7 +883,23 @@ c_oacc_split_loop_clauses (tree clauses, tree *not_loop_clauses)
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_PRIVATE:
+ OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
+ loop_clauses = clauses;
+ break;
+
+ /* Reductions must be duplicated on both constructs. */
case OMP_CLAUSE_REDUCTION:
+ if (is_parallel)
+ {
+ nc = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
+ OMP_CLAUSE_REDUCTION);
+ OMP_CLAUSE_DECL (nc) = OMP_CLAUSE_DECL (clauses);
+ OMP_CLAUSE_REDUCTION_CODE (nc)
+ = OMP_CLAUSE_REDUCTION_CODE (clauses);
+ OMP_CLAUSE_CHAIN (nc) = *not_loop_clauses;
+ *not_loop_clauses = nc;
+ }
+
OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
loop_clauses = clauses;
break;
@@ -13822,6 +13822,8 @@ static tree
c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
omp_clause_mask mask, tree *cclauses, bool *if_p)
{
+ bool is_parallel = ((mask >> PRAGMA_OACC_CLAUSE_REDUCTION) & 1) == 1;
+
strcat (p_name, " loop");
mask |= OACC_LOOP_CLAUSE_MASK;
@@ -13829,7 +13831,7 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
cclauses == NULL);
if (cclauses)
{
- clauses = c_oacc_split_loop_clauses (clauses, cclauses);
+ clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel);
if (*cclauses)
*cclauses = c_finish_omp_clauses (*cclauses, false);
if (clauses)
@@ -13924,8 +13926,6 @@ c_parser_oacc_kernels_parallel (location_t loc, c_parser *parser,
if (strcmp (p, "loop") == 0)
{
c_parser_consume_token (parser);
- mask |= OACC_LOOP_CLAUSE_MASK;
-
tree block = c_begin_omp_parallel ();
tree clauses;
c_parser_oacc_loop (loc, parser, p_name, mask, &clauses, if_p);
@@ -35396,6 +35396,8 @@ static tree
cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
omp_clause_mask mask, tree *cclauses, bool *if_p)
{
+ bool is_parallel = ((mask >> PRAGMA_OACC_CLAUSE_REDUCTION) & 1) == 1;
+
strcat (p_name, " loop");
mask |= OACC_LOOP_CLAUSE_MASK;
@@ -35403,7 +35405,7 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
cclauses == NULL);
if (cclauses)
{
- clauses = c_oacc_split_loop_clauses (clauses, cclauses);
+ clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel);
if (*cclauses)
*cclauses = finish_omp_clauses (*cclauses, false);
if (clauses)
@@ -35496,8 +35498,6 @@ cp_parser_oacc_kernels_parallel (cp_parser *parser, cp_token *pragma_tok,
if (strcmp (p, "loop") == 0)
{
cp_lexer_consume_token (parser->lexer);
- mask |= OACC_LOOP_CLAUSE_MASK;
-
tree block = begin_omp_parallel ();
tree clauses;
cp_parser_oacc_loop (parser, pragma_tok, p_name, mask, &clauses,
@@ -3497,7 +3497,8 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
construct_clauses.independent = false;
construct_clauses.tile_list = NULL;
construct_clauses.lists[OMP_LIST_PRIVATE] = NULL;
- construct_clauses.lists[OMP_LIST_REDUCTION] = NULL;
+ if (construct_code == OACC_KERNELS)
+ construct_clauses.lists[OMP_LIST_REDUCTION] = NULL;
oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
code->loc);
}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenacc -fdump-tree-gimple" } */
+
+#include <assert.h>
+
+int
+main ()
+{
+ int i, v1 = 0, n = 100;
+
+#pragma acc parallel loop reduction(+:v1)
+ for (i = 0; i < n; i++)
+ v1++;
+
+ assert (v1 == n);
+
+#pragma acc kernels loop reduction(+:v1)
+ for (i = 0; i < n; i++)
+ v1++;
+
+ assert (v1 == n);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. map.tofrom:v1" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_kernels map.force_tofrom:n .len: 4.. map.force_tofrom:v1 .len: 4.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
@@ -15,7 +15,7 @@ subroutine foo ()
!$acc end kernels loop
end subroutine
-! { dg-final { scan-tree-dump-times "target oacc_parallel firstprivate.a." 1 "gimple" } }
+! { dg-final { scan-tree-dump-times "target oacc_parallel reduction..:a. map.tofrom.a." 1 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.p. reduction..:a." 1 "gimple" } }
! { dg-final { scan-tree-dump-times "target oacc_kernels map.force_tofrom:a .len: 4.." 1 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.k. reduction..:a." 1 "gimple" } }
@@ -7,7 +7,7 @@ sum (T array[])
{
T s = 0;
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (array[0:n])
for (int i = 0; i < n; i++)
s += array[i];
@@ -25,7 +25,7 @@ sum ()
for (int i = 0; i < n; i++)
array[i] = i+1;
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s)
for (int i = 0; i < n; i++)
s += array[i];
@@ -43,7 +43,7 @@ async_sum (T array[])
for (int i = 0; i < n; i++)
array[i] = i+1;
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) async wait (1)
for (int i = 0; i < n; i++)
s += array[i];
@@ -59,7 +59,7 @@ async_sum (int c)
{
T s = 0;
-#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) firstprivate (c) async wait (1)
for (int i = 0; i < n; i++)
s += i+c;
new file mode 100644
@@ -0,0 +1,23 @@
+/* Test a combined acc parallel loop reduction. */
+
+/* { dg-do run } */
+
+#include <assert.h>
+
+int
+main ()
+{
+ int i, v1 = 0, v2 = 0, n = 100;
+
+#pragma acc parallel loop reduction(+:v1, v2)
+ for (i = 0; i < n; i++)
+ {
+ v1++;
+ v2++;
+ }
+
+ assert (v1 == n);
+ assert (v2 == n);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+! Test a combined acc parallel loop reduction.
+
+! { dg-do run }
+
+program test
+ implicit none
+ integer i, n, var
+
+ n = 100
+ var = 0
+
+ !$acc parallel loop reduction(+:var)
+ do i = 1, 100
+ var = var + 1
+ end do
+ !$acc end parallel loop
+
+ if (var .ne. n) call abort
+end program test