commit b2bb572cef2b6b0984d65995e070dc424b03a525
Author: jbrown <jbrown@e7755896-6108-0410-9592-8049d3e74e28>
Date: Mon May 11 16:04:48 2015 +0000
Add vector-single/vector-partitioned tests.
new file mode 100644
@@ -0,0 +1,30 @@
+#include <assert.h>
+
+/* Test basic vector-partitioned mode transitions. */
+
+int
+main (int argc, char *argv[])
+{
+ int n = 0, arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(n, arr) num_gangs(1) num_workers(1) \
+ vector_length(32)
+ {
+ int j;
+ n++;
+ #pragma acc loop vector
+ for (j = 0; j < 32; j++)
+ arr[j]++;
+ n++;
+ }
+
+ assert (n == 2);
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 1);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+#include <assert.h>
+
+/* Test vector-partitioned, gang-partitioned mode. */
+
+int
+main (int argc, char *argv[])
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) num_gangs(32) num_workers(1) \
+ vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == 1);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,54 @@
+#include <assert.h>
+
+/* Test conditional vector-partitioned loops. */
+
+int
+main (int argc, char *argv[])
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) num_gangs(32) num_workers(1) \
+ vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ if ((j % 2) == 0)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+ }
+ else
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]--;
+ }
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == (i % 64) < 32 ? 1 : -1);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,46 @@
+#include <assert.h>
+
+/* Test conditions inside vector-partitioned loops. */
+
+int
+main (int argc, char *argv[])
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) num_gangs(32) num_workers(1) \
+ vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ if ((arr[j * 32 + k] % 2) != 0)
+ arr[j * 32 + k] *= 2;
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+#include <assert.h>
+
+/* Test conditions inside gang-partitioned/vector-partitioned loops. */
+
+int
+main (int argc, char *argv[])
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) num_gangs(32) num_workers(1) \
+ vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang vector
+ for (j = 0; j < 1024; j++)
+ if ((arr[j] % 2) != 0)
+ arr[j] *= 2;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,77 @@
+#include <assert.h>
+#include <stdlib.h>
+
+/* Test switch containing vector-partitioned loops inside gang-partitioned
+ loops. */
+
+int
+main (int argc, char *argv[])
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = i % 5;
+
+ #pragma acc parallel copy(n, arr) num_gangs(32) num_workers(1) \
+ vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ switch (n[j])
+ {
+ case 1:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 1;
+ break;
+
+ case 2:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 2;
+ break;
+
+ case 3:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 3;
+ break;
+
+ case 4:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 4;
+ break;
+
+ case 5:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 5;
+ break;
+
+ default:
+ abort ();
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == (i % 5) + 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i / 32) % 5) + 1);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,15 @@
+#include <assert.h>
+
+/* Test trivial operation of vector-single mode. */
+
+int
+main (int argc, char *argv[])
+{
+ int n = 0;
+ #pragma acc parallel copy(n) num_gangs(1) num_workers(1) vector_length(32)
+ {
+ n++;
+ }
+ assert (n == 1);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,32 @@
+#include <assert.h>
+
+/* Test vector-single, gang-partitioned mode. */
+
+int
+main (int argc, char *argv[])
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) num_gangs(gangs) num_workers(1) \
+ vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ arr[j]++;
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == 1);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+#include <assert.h>
+
+/* Test conditions in vector-single mode. */
+
+int
+main (int argc, char *argv[])
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) num_gangs(gangs) num_workers(1) \
+ vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ if ((j % 3) == 0)
+ arr[j]++;
+ else
+ arr[j] += 2;
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 3) == 0) ? 1 : 2);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+#include <assert.h>
+
+/* Test switch in vector-single mode. */
+
+int
+main (int argc, char *argv[])
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) num_gangs(gangs) num_workers(1) \
+ vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ switch (j % 5)
+ {
+ case 0: arr[j] += 1; break;
+ case 1: arr[j] += 2; break;
+ case 2: arr[j] += 3; break;
+ case 3: arr[j] += 4; break;
+ case 4: arr[j] += 5; break;
+ default: arr[j] += 99;
+ }
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == (i % 5) + 1);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+#include <assert.h>
+
+/* Test switch in vector-single mode, initialise array on device. */
+
+int
+main (int argc, char *argv[])
+{
+ int arr[1024];
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 99;
+
+ #pragma acc parallel copy(arr) num_gangs(1024) num_workers(1) \
+ vector_length(32)
+ {
+ int j;
+
+ /* This loop and the one following must be distributed to available gangs
+ in the same way to ensure data dependencies are not violated (hence the
+ "static" clauses). */
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 1024; j++)
+ arr[j] = 0;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 1024; j++)
+ switch (j % 5)
+ {
+ case 0: arr[j] += 1; break;
+ case 1: arr[j] += 2; break;
+ case 2: arr[j] += 3; break;
+ case 3: arr[j] += 4; break;
+ case 4: arr[j] += 5; break;
+ default: arr[j] += 99;
+ }
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == (i % 5) + 1);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,49 @@
+#include <assert.h>
+#include <stdbool.h>
+
+#define NUM_GANGS 4096
+
+/* Test multiple conditions in vector-single mode. */
+
+int
+main (int argc, char *argv[])
+{
+ bool fizz[NUM_GANGS], buzz[NUM_GANGS], fizzbuzz[NUM_GANGS];
+ int i;
+
+ #pragma acc parallel copyout(fizz, buzz, fizzbuzz) \
+ num_gangs(NUM_GANGS) num_workers(1) vector_length(32)
+ {
+ int j;
+
+ /* This loop and the one following must be distributed to available gangs
+ in the same way to ensure data dependencies are not violated (hence the
+ "static" clauses). */
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < NUM_GANGS; j++)
+ fizz[j] = buzz[j] = fizzbuzz[j] = 0;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < NUM_GANGS; j++)
+ {
+ if ((j % 3) == 0 && (j % 5) == 0)
+ fizzbuzz[j] = 1;
+ else
+ {
+ if ((j % 3) == 0)
+ fizz[j] = 1;
+ else if ((j % 5) == 0)
+ buzz[j] = 1;
+ }
+ }
+ }
+
+ for (i = 0; i < NUM_GANGS; i++)
+ {
+ assert (fizzbuzz[i] == ((i % 3) == 0 && (i % 5) == 0));
+ assert (fizz[i] == ((i % 3) == 0 && (i % 5) != 0));
+ assert (buzz[i] == ((i % 3) != 0 && (i % 5) == 0));
+ }
+
+ return 0;
+}