===================================================================
@@ -19,7 +19,7 @@
hs1 = 0
hs2 = 0
- !$acc parallel num_gangs (1000) copy(gs1, gs2)
+ !$acc parallel num_gangs (1000)
!$acc loop reduction(+:gs1, gs2) gang
do i = 1, n
gs1 = gs1 + 1
@@ -27,7 +27,7 @@
end do
!$acc end parallel
- !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
+ !$acc parallel num_workers (4) vector_length (32)
!$acc loop reduction(+:ws1, ws2) worker
do i = 1, n
ws1 = ws1 + 1
@@ -35,7 +35,7 @@
end do
!$acc end parallel
- !$acc parallel vector_length (32) copy(vs1, vs2)
+ !$acc parallel vector_length (32)
!$acc loop reduction(+:vs1, vs2) vector
do i = 1, n
vs1 = vs1 + 1
@@ -43,7 +43,7 @@
end do
!$acc end parallel
- !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
+ !$acc parallel num_gangs(8) num_workers(4) vector_length(32)
!$acc loop reduction(+:cs1, cs2) gang worker vector
do i = 1, n
cs1 = cs1 + 1
@@ -74,7 +74,7 @@
red = 0
vred = 0
- !$acc parallel num_gangs(10) vector_length(32) copy(red)
+ !$acc parallel num_gangs(10) vector_length(32)
!$acc loop reduction(+:red) gang
do i = 1, n/chunksize
!$acc loop reduction(+:red) vector
===================================================================
@@ -50,7 +50,7 @@
end subroutine redsub_private
-! Bogus reduction on an impliclitly firstprivate variable. The results do
+! Bogus reduction on a firstprivate variable. The results do
! survive the parallel region. The goal here is to ensure that gfortran
! doesn't ICE.
@@ -58,7 +58,7 @@
integer :: sum, n, arr(n)
integer :: i
- !$acc parallel
+ !$acc parallel firstprivate(sum)
!$acc loop gang worker vector reduction (+:sum)
do i = 1, n
sum = sum + 1
@@ -72,7 +72,7 @@
integer :: sum, n, arr(n)
integer :: i, j
- !$acc parallel copy (arr)
+ !$acc parallel copy (arr) firstprivate(sum)
!$acc loop gang
do i = 1, n
sum = i;
===================================================================
@@ -8,7 +8,7 @@
int i, j, k, l = 0, f = 0, x = 0;
int m1 = 4, m2 = -5, m3 = 17;
-#pragma acc parallel copy(l)
+#pragma acc parallel
#pragma acc loop seq collapse(3) reduction(+:l)
for (i = -2; i < m1; i++)
for (j = m2; j < -2; j++)
===================================================================
@@ -10,7 +10,7 @@
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop worker vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
===================================================================
@@ -13,8 +13,7 @@
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res)
for (i = 0; i < 1024; i++)
@@ -28,8 +27,7 @@
res = hres = 1;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(*:res)
for (i = 0; i < 12; i++)
@@ -53,8 +51,7 @@
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang vector reduction(+:res)
for (i = 0; i < 1024; i++)
@@ -78,8 +75,7 @@
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang worker reduction(+:res)
for (i = 0; i < 1024; i++)
@@ -103,8 +99,7 @@
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
@@ -128,8 +123,7 @@
for (i = 0; i < 32768; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
@@ -161,7 +155,7 @@
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copyin(arr) copy(res)
+ copyin(arr)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
@@ -191,8 +185,7 @@
for (i = 0; i < 32768; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res, mres)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res) reduction(max:mres)
for (j = 0; j < 32; j++)
===================================================================
@@ -12,7 +12,7 @@
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev)
{
#pragma acc loop vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
===================================================================
@@ -11,7 +11,7 @@
memset (b, '\0', sizeof (b));
-#pragma acc parallel copy(b[0:3][0:3]) copy(l)
+#pragma acc parallel copy(b[0:3][0:3])
{
#pragma acc loop collapse(2) reduction(+:l)
for (i = 0; i < 2; i++)
===================================================================
@@ -19,7 +19,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -43,7 +43,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -67,7 +67,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
===================================================================
@@ -11,7 +11,7 @@
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop gang worker vector reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
===================================================================
@@ -22,7 +22,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -46,7 +46,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -70,7 +70,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
===================================================================
@@ -19,7 +19,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -43,7 +43,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -67,7 +67,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
===================================================================
@@ -11,7 +11,7 @@
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ondev)
{
#pragma acc loop gang reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
===================================================================
@@ -11,7 +11,7 @@
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop worker reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
===================================================================
@@ -22,7 +22,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -46,7 +46,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
@@ -70,7 +70,7 @@
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)