diff mbox

[gomp4,06/14] omp-low: copy omp_data_o to shared memory on NVPTX

Message ID 20151126095046.GD5675@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 26, 2015, 9:50 a.m. UTC
On Tue, Nov 10, 2015 at 11:39:36AM +0100, Jakub Jelinek wrote:
> On Tue, Nov 03, 2015 at 05:25:53PM +0300, Alexander Monakov wrote:
> > Here's an alternative patch that does not depend on exposure of shared-memory
> > address space, and does not try to use pass_late_lower_omp.  It's based on
> > Bernd's suggestion to transform
> 
> FYI, I've committed a new testcase to gomp-4_5-branch that covers various
> target data sharing/team sharing/privatization parallel
> sharing/privatization offloading cases.

And another testcase, this time using only OpenMP 4.0 features, and trying
to test the behavior of addressable vars in declare target functions where
it is not clear if they are executed in teams, distribute or parallel for
contexts.

Wanted to look what LLVM generates here (tried llvm trunk), but they are
unable to parse #pragma omp distribute or #pragma omp declare target,
so it is hard to guess anything.

Tested with XeonPhi offloading as well as host fallback, committed to trunk.

2015-11-26  Jakub Jelinek  <jakub@redhat.com>

	* testsuite/libgomp.c/target-35.c: New test.



	Jakub
diff mbox

Patch

--- libgomp/testsuite/libgomp.c/target-35.c	(revision 0)
+++ libgomp/testsuite/libgomp.c/target-35.c	(working copy)
@@ -0,0 +1,129 @@ 
+#include <omp.h>
+#include <stdlib.h>
+
+#pragma omp declare target
+__attribute__((noinline))
+void
+foo (int x, int y, int z, int *a, int *b)
+{
+  if (x == 0)
+    {
+      int i, j;
+      for (i = 0; i < 64; i++)
+	#pragma omp parallel for shared (a, b)
+	for (j = 0; j < 32; j++)
+	  foo (3, i, j, a, b);
+    }
+  else if (x == 1)
+    {
+      int i, j;
+      #pragma omp distribute dist_schedule (static, 1)
+      for (i = 0; i < 64; i++)
+	#pragma omp parallel for shared (a, b)
+	for (j = 0; j < 32; j++)
+	  foo (3, i, j, a, b);
+    }
+  else if (x == 2)
+    {
+      int j;
+      #pragma omp parallel for shared (a, b)
+      for (j = 0; j < 32; j++)
+	foo (3, y, j, a, b);
+    }
+  else
+    {
+      #pragma omp atomic
+      b[y] += z;
+      #pragma omp atomic
+      *a += 1;
+    }
+}
+
+__attribute__((noinline))
+int
+bar (int x, int y, int z)
+{
+  int a, b[64], i;
+  a = 8;
+  for (i = 0; i < 64; i++)
+    b[i] = i;
+  foo (x, y, z, &a, b);
+  if (x == 0)
+    {
+      if (a != 8 + 64 * 32)
+	return 1;
+      for (i = 0; i < 64; i++)
+	if (b[i] != i + 31 * 32 / 2)
+	  return 1;
+    }
+  else if (x == 1)
+    {
+      int c = omp_get_num_teams ();
+      int d = omp_get_team_num ();
+      int e = d;
+      int f = 0;
+      for (i = 0; i < 64; i++)
+	if (i == e)
+	  {
+	    if (b[i] != i + 31 * 32 / 2)
+	      return 1;
+	    f++;
+	    e = e + c;
+	  }
+	else if (b[i] != i)
+	  return 1;
+      if (a < 8 || a > 8 + f * 32)
+	return 1;
+    }
+  else if (x == 2)
+    {
+      if (a != 8 + 32)
+	return 1;
+      for (i = 0; i < 64; i++)
+	if (b[i] != i + (i == y ? 31 * 32 / 2 : 0))
+	  return 1;
+    }
+  else if (x == 3)
+    {
+      if (a != 8 + 1)
+	return 1;
+      for (i = 0; i < 64; i++)
+	if (b[i] != i + (i == y ? z : 0))
+	  return 1;
+    }
+  return 0;
+}
+#pragma omp end declare target
+
+int
+main ()
+{
+  int i, j, err = 0;
+  #pragma omp target map(tofrom:err)
+  #pragma omp teams reduction(+:err)
+  err += bar (0, 0, 0);
+  if (err)
+    abort ();
+  #pragma omp target map(tofrom:err)
+  #pragma omp teams reduction(+:err)
+  err += bar (1, 0, 0);
+  if (err)
+    abort ();
+  #pragma omp target map(tofrom:err)
+  #pragma omp teams reduction(+:err)
+  #pragma omp distribute
+  for (i = 0; i < 64; i++)
+    err += bar (2, i, 0);
+  if (err)
+    abort ();
+  #pragma omp target map(tofrom:err)
+  #pragma omp teams reduction(+:err)
+  #pragma omp distribute
+  for (i = 0; i < 64; i++)
+  #pragma omp parallel for reduction(+:err)
+    for (j = 0; j < 32; j++)
+      err += bar (3, i, j);
+  if (err)
+    abort ();
+  return 0;
+}