diff mbox

[gomp4] OpenACC update if_present runtime support

Message ID fe6a8dbe-b0ac-9c5d-f7cc-00bfec2d77a8@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis May 9, 2017, 3:06 p.m. UTC
This patch adds runtime support for the OpenACC update if_present
clause. It turned out to require significantly less work to implement
if_present in the runtime. Instead of creating a new API for
GOACC_updated, I exploited the fact that OpenACC no longer uses
GOMP_MAP_FORCE_* data mappings. This allowed me to encode the if_present
update data mappings as GOMP_MAP_{TO,FROM} for device, and host/self,
respectively, during gimplification. The actual runtime changes
themselves were minor; the runtime only needs to call acc_is_present
prior to actually updating the device/host data.

I've applied this patch to gomp-4_0-branch.

Cesar
diff mbox

Patch

2017-05-09  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* gimplify.c (gimplify_omp_target_update): Relax OpenACC update data
	mappings to GOMP_MAP_{TO,FROM} when the user specifies if_present.

	gcc/testsuite/
	* c-c++-common/goacc/update-if_present-1.c: Update test case.

	libgomp/
	* oacc-parallel.c (GOACC_update): Handle GOMP_MAP_{TO,FROM} for the
	if_present data clauses.
	* testsuite/libgomp.oacc-c-c++-common/update-2.c: New test.
	* testsuite/libgomp.oacc-fortran/update-3.f90: New test.


diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index af908f4..47fe9ee 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -10034,6 +10034,25 @@  gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p)
 			     ort, TREE_CODE (expr));
   gimplify_adjust_omp_clauses (pre_p, NULL, &OMP_STANDALONE_CLAUSES (expr),
 			       TREE_CODE (expr));
+  if (TREE_CODE (expr) == OACC_UPDATE
+      && find_omp_clause (OMP_STANDALONE_CLAUSES (expr), OMP_CLAUSE_IF_PRESENT))
+    {
+      /* The runtime uses GOMP_MAP_{TO,FROM} to denote the if_present
+	 clause.  */
+      for (tree c = OMP_STANDALONE_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
+	  switch (OMP_CLAUSE_MAP_KIND (c))
+	    {
+	    case GOMP_MAP_FORCE_TO:
+	      OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TO);
+	      break;
+	    case GOMP_MAP_FORCE_FROM:
+	      OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FROM);
+	      break;
+	    default:
+	      break;
+	    }
+    }
   stmt = gimple_build_omp_target (NULL, kind, OMP_STANDALONE_CLAUSES (expr));
 
   gimplify_seq_add_stmt (pre_p, stmt);
diff --git a/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c b/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c
index 519393cf..5a19ee1 100644
--- a/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c
@@ -12,6 +12,18 @@  t ()
 #pragma acc update device(b) async if_present
 #pragma acc update host(c[1:3]) wait(4) if_present
 #pragma acc update self(c) device(b) host (a) async(10) if (a == 5) if_present
+
+#pragma acc update self(a)
+#pragma acc update device(b) async
+#pragma acc update host(c[1:3]) wait(4)
+#pragma acc update self(c) device(b) host (a) async(10) if (a == 5)
 }
 
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_update if_present" 4 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present map.from:a .len: 4.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present async.-1. map.to:b .len: 4.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present wait.4. map.from:c.1. .len: 12.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present if.D...... async.10. map.from:a .len: 4.. map.to:b .len: 4.. map.from:c .len: 40.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update map.force_from:a .len: 4.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update async.-1. map.force_to:b .len: 4.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update wait.4. map.force_from:c.1. .len: 12.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_update if.D...... async.10. map.force_from:a .len: 4.. map.force_to:b .len: 4.. map.force_from:c .len: 40.." 1 "omplower" } } */
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 66acdf6..de70ac0 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -683,14 +683,29 @@  GOACC_update (int device, size_t mapnum,
 
 	      /* Restore the host pointer.  */
 	      *(uintptr_t *) hostaddrs[i] = t;
+	      update_device = false;
 	    }
 	  break;
 
+	case GOMP_MAP_TO:
+	  if (!acc_is_present (hostaddrs[i], sizes[i]))
+	    {
+	      update_device = false;
+	      break;
+	    }
+	  /* Fallthru  */
 	case GOMP_MAP_FORCE_TO:
 	  update_device = true;
 	  acc_update_device (hostaddrs[i], sizes[i]);
 	  break;
 
+	case GOMP_MAP_FROM:
+	  if (!acc_is_present (hostaddrs[i], sizes[i]))
+	    {
+	      update_device = false;
+	      break;
+	    }
+	  /* Fallthru  */
 	case GOMP_MAP_FORCE_FROM:
 	  update_device = false;
 	  acc_update_self (hostaddrs[i], sizes[i]);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c
new file mode 100644
index 0000000..95bc16d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c
@@ -0,0 +1,100 @@ 
+/* Test OpenACC update if_present clause.  Note that part of this test
+   depends on a target that requires discrete memory.  See the early
+   exit at acc_get_device_type.  */
+
+/* { dg-compile } */
+
+#include <assert.h>
+#include <openacc.h>
+
+void
+t (int n, int a, int *c, int *d)
+{
+  int b, i;
+
+#pragma acc parallel loop present(c[0:n]) copyout(d[0:n])
+  for (i = 0; i < n; i++)
+    d[i] = c[i];
+
+  for (i = 0; i < n; i++)
+    {
+      assert (d[i] == a);
+      c[i] = i;
+    }
+
+  a = 0;
+
+#pragma acc update device(c[0:n], a) if_present
+
+#pragma acc parallel loop reduction(+:a) present(c[0:n])
+  for (i = 0; i < n; i++)
+    a += c[i];
+
+#pragma acc update host(a) if_present
+
+  b = 0;
+  for (i = 0; i < n; i++)
+    b += i;
+
+  assert (a == b);
+
+#pragma acc enter data copyin (b)
+#pragma acc parallel loop reduction (+:b)
+  for (i = 0; i < n; i++)
+    b += i;
+
+#pragma acc update self(b)
+  assert (2*a == b);
+#pragma acc exit data delete(b)
+
+#pragma acc parallel loop present(c[0:n])
+  for (i = 0; i < n; i++)
+    c[i] =  i;
+
+#pragma acc update host (c[0:n])
+
+#pragma acc parallel loop present(c[0:n])
+  for (i = 0; i < n; i++)
+    c[i] =  1000 + i;
+}
+
+int
+main ()
+{
+  int n = 100;
+  int a, i, x[n], y[n];
+
+#pragma acc update self(a) if_present
+#pragma acc update if_present device(x[20:10])
+
+  a = -1;
+  for (i = 0; i < n; i++)
+    x[i] = a;
+
+  if (acc_get_device_type () == acc_device_host)
+    return 0;
+
+#pragma acc enter data copyin(x)
+  t (n, a, x, y);
+
+#pragma acc update host(x[10:20])
+
+  for (i = 0; i < n; i++)
+    if (i < 10 || i >= 30)
+      assert (x[i] == i);
+    else
+      assert (x[i] == 1000 + i);
+
+#pragma acc exit data delete(x)
+
+  a = -1;
+  for (i = 0; i < n; i++)
+    x[i] = a;
+
+#pragma acc data copy (x)
+  {
+    t (n, a, x, y);
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90
new file mode 100644
index 0000000..a39358c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90
@@ -0,0 +1,97 @@ 
+! Test OpenACC update if_present clause.  Note that part of this test
+! depends on a target that requires discrete memory.  See the early
+! exit at acc_get_device_type.
+
+! { dg-compile }
+
+program main
+  use openacc
+  integer, parameter ::  n = 100
+  integer ::  a, i, x(n), y(n)
+
+  !$acc update self(a) if_present
+  !$acc update if_present device(x(20:30))
+
+  a = -1
+  do i = 1, n
+     x(i) = a
+  end do
+
+  if (acc_get_device_type () == acc_device_host) call exit (0)
+
+  !$acc enter data copyin(x)
+  call t (n, a, x, y)
+
+  !$acc update host(x(10:30))
+
+  do i = 1, n
+     if ((i < 10 .or. i > 30) .and. (x(i) /= i)) call abort
+     if ((i >= 10 .and. i <= 30) .and. (x(i) /= 1000 + i)) call abort
+  end do
+
+  !$acc exit data delete(x)
+
+  a = -1;
+  do i = 1, n
+     x(i) = a
+  end do
+
+  !$acc data copy (x)
+  call t (n, a, x, y)
+  !$acc end data
+
+contains
+  subroutine t (n, a, c, d)
+    integer :: n, a, b, i, c(n), d(n)
+
+!$acc parallel loop present(c(1:n)) copyout(d(1:n))
+    do i = 1, n
+       d(i) = c(i)
+    end do
+
+    do i = 1, n
+       if (d(i) /= a) call abort
+       c(i) = i
+    end do
+
+    a = 0
+
+    !$acc update device(c(1:n), a) if_present
+
+    !$acc parallel loop reduction(+:a) present(c(1:n))
+    do i = 1, n
+       a = a + c(i)
+    end do
+
+    !$acc update host(a) if_present
+
+    b = 0
+    do i = 1, n
+       b = b + i
+    end do
+
+    if (a /= b) call abort
+
+    !$acc enter data copyin (b)
+    !$acc parallel loop reduction (+:b)
+    do i = 1, n
+       b = b + i;
+    end do
+
+    !$acc update self(b)
+    if (2*a /= b) call abort
+    !$acc exit data delete(b)
+
+    !$acc parallel loop present(c(1:n))
+    do i = 1, n
+       c(i) = i
+    end do
+
+    !$acc update host (c(1:n))
+
+    !$acc parallel loop present(c(1:n))
+    do i = 1, n
+       c(i) = 1000 + i
+    end do
+  end subroutine t
+end program main