From patchwork Thu Nov 19 19:02:36 2015
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Nathan Sidwell <nathan@acm.org>
X-Patchwork-Id: 546625
Return-Path: 
 <gcc-patches-return-414696-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id 2AB5814141C
	for <incoming@patchwork.ozlabs.org>;
	Fri, 20 Nov 2015 06:02:52 +1100 (AEDT)
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org
	header.b=b0kLJlka; dkim-atps=neutral
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:to
	:from:subject:message-id:date:mime-version:content-type; q=dns;
	s=default; b=E1Hcw9s3m8ciuAp37Q74lkHc7k31k7K/uDlRR0C9vGt4UoYq/Z
	vokBroB1i2mRJLorMcRBm+IWViDRcEccoROO/W7Qa/WQhCUS1vk017BgXwPOMim3
	KtaB1T6Sj4GpHEW/7xTyGOy4ts/ZTmQ7Y61aXSBwXoRikaNneDzDvKYMs=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:to
	:from:subject:message-id:date:mime-version:content-type; s=
	default; bh=2oelbbHmGuB8Vep4VeXUePbu98U=; b=b0kLJlkapjDyZbFlgoKD
	stKDd7xIELWtf8h2yu/yNMY7qVaIbRWnq5tx4V0nMrp/7EKfC1On/I2cOn7cx6K2
	WqC++htIFU865NAZV3BDDKVcuE/KmgpjN9zp1PDQD6m7E8mTXd+K3OBoh9Xwhlct
	qI8eK+/4cysSjzLVgg3iowg=
Received: (qmail 54727 invoked by alias); 19 Nov 2015 19:02:43 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 54711 invoked by uid 89); 19 Nov 2015 19:02:42 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=BAYES_00,
	FREEMAIL_FROM, KAM_ASCII_DIVIDERS, RCVD_IN_DNSWL_LOW,
	SPF_PASS autolearn=no version=3.3.2
X-HELO: mail-yk0-f170.google.com
Received: from mail-yk0-f170.google.com (HELO mail-yk0-f170.google.com)
	(209.85.160.170) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256
	encrypted) ESMTPS; Thu, 19 Nov 2015 19:02:40 +0000
Received: by ykdv3 with SMTP id v3so121737178ykd.0 for
	<gcc-patches@gcc.gnu.org>; Thu, 19 Nov 2015 11:02:38 -0800 (PST)
X-Received: by 10.129.32.213 with SMTP id g204mr8295879ywg.234.1447959758010;
	Thu, 19 Nov 2015 11:02:38 -0800 (PST)
Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48?
	([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by
	smtp.googlemail.com with ESMTPSA id
	f129sm1476045ywd.10.2015.11.19.11.02.37 (version=TLSv1/SSLv3
	cipher=OTHER); Thu, 19 Nov 2015 11:02:37 -0800 (PST)
To: GCC Patches <gcc-patches@gcc.gnu.org>
From: Nathan Sidwell <nathan@acm.org>
Subject: Openacc reduction tests
Message-ID: <564E1CCC.9040609@acm.org>
Date: Thu, 19 Nov 2015 14:02:36 -0500
User-Agent: Mozilla/5.0 (X11; Linux x86_64;
	rv:38.0) Gecko/20100101 Thunderbird/38.3.0
MIME-Version: 1.0

in adding the complex double support I noticed some existing tests had commented 
out sequences. These were broken tests

a) the multiplication reduction assumed that  0 * 99! was numerically stable in 
the face of arbitrary re-association.  While it did guess that results might 
vary, it used an absolute, rather than relative, epsilon value.

b) A min/max test assumed that that operator is well defined on complex numbers 
-- it's not.

c) Another min/max test failed because the reduction was specified as +

Fixed thusly,

nathan

2015-11-19  Nathan Sidwell  <nathan@codesourcery.com>

	* libgomp.oacc-c-c++-common/reduction-dbl.c: New.
	* libgomp.oacc-c-c++-common/reduction-flt.c: New.
	* libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
	* libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
	* libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
	and fix.
	* libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
	* libgomp.oacc-c-c++-common/reduction-4.c: Likewise.

Index: testsuite/libgomp.oacc-c-c++-common/reduction-2.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-2.c	(revision 230605)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-2.c	(working copy)
@@ -50,39 +50,37 @@ main(void)
 
   if (fabs(result - vresult) > .0001)
     abort ();
-//   result = 0;
-//   vresult = 0;
-// 
-//   /* 'max' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result > array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult > array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
-// 
-//   result = 0;
-//   vresult = 0;
-// 
-//   /* 'min' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result < array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult < array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
+  result = 0;
+  vresult = 0;
+
+  /* 'max' reductions.  */
+#pragma acc parallel vector_length (vl) copy(result)
+#pragma acc loop reduction (max:result)
+  for (i = 0; i < n; i++)
+    result = result > array[i] ? result : array[i];
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult = vresult > array[i] ? vresult : array[i];
+
+  if (result != vresult)
+    abort ();
+
+  result = 0;
+  vresult = 0;
+
+  /* 'min' reductions.  */
+#pragma acc parallel vector_length (vl) copy(result)
+#pragma acc loop reduction (min:result)
+  for (i = 0; i < n; i++)
+    result = result < array[i] ? result : array[i];
+
+  /* Verify the reduction.  */
+  for (i = 0; i < n; i++)
+    vresult = vresult < array[i] ? vresult : array[i];
+
+  if (result != vresult)
+    abort ();
 
   result = 5;
   vresult = 5;
Index: testsuite/libgomp.oacc-c-c++-common/reduction-3.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-3.c	(revision 230605)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-3.c	(working copy)
@@ -22,15 +22,15 @@ main(void)
   result = 0;
   vresult = 0;
 
-  /* '+' reductions.  */
+  /* 'max' reductions.  */
 #pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (+:result)
+#pragma acc loop reduction (max:result)
   for (i = 0; i < n; i++)
-    result += array[i];
+    result = result > array[i] ? result : array[i];
 
   /* Verify the reduction.  */
   for (i = 0; i < n; i++)
-    vresult += array[i];
+    vresult = vresult > array[i] ? vresult : array[i];
 
   if (result != vresult)
     abort ();
@@ -38,51 +38,18 @@ main(void)
   result = 0;
   vresult = 0;
 
-  /* '*' reductions.  */
+  /* 'min' reductions.  */
 #pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (*:result)
+#pragma acc loop reduction (min:result)
   for (i = 0; i < n; i++)
-    result *= array[i];
+    result = result < array[i] ? result : array[i];
 
   /* Verify the reduction.  */
   for (i = 0; i < n; i++)
-    vresult *= array[i];
+    vresult = vresult < array[i] ? vresult : array[i];
 
-  if (fabs(result - vresult) > .0001)
+  if (result != vresult)
     abort ();
-//   result = 0;
-//   vresult = 0;
-// 
-//   /* 'max' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result > array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult > array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
-// 
-//   result = 0;
-//   vresult = 0;
-// 
-//   /* 'min' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result < array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult < array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
 
   result = 5;
   vresult = 5;
Index: testsuite/libgomp.oacc-c-c++-common/reduction-4.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-4.c	(revision 230605)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-4.c	(working copy)
@@ -23,76 +23,6 @@ main(void)
   result = 0;
   vresult = 0;
 
-  /* '+' reductions.  */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (+:result)
-  for (i = 0; i < n; i++)
-    result += array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult += array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* Needs support for complex multiplication.  */
-
-//   /* '*' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (*:result)
-//   for (i = 0; i < n; i++)
-//     result *= array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//     vresult *= array[i];
-// 
-//   if (fabs(result - vresult) > .0001)
-//     abort ();
-//   result = 0;
-//   vresult = 0;
-
-//   /* 'max' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result > array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult > array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
-// 
-//   result = 0;
-//   vresult = 0;
-// 
-//   /* 'min' reductions.  */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-//   for (i = 0; i < n; i++)
-//       result = result < array[i] ? result : array[i];
-// 
-//   /* Verify the reduction.  */
-//   for (i = 0; i < n; i++)
-//       vresult = vresult < array[i] ? vresult : array[i];
-// 
-//   printf("%d != %d\n", result, vresult);
-//   if (result != vresult)
-//     abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = false;
-  lvresult = false;
-
   /* '&&' reductions.  */
 #pragma acc parallel vector_length (vl) copy(lresult)
 #pragma acc loop reduction (&&:lresult)
Index: testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c	(revision 230605)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c	(working copy)
@@ -3,10 +3,11 @@
 
 /* Double float has 53 bits of fraction. */
 #define FRAC (1.0 / (1LL << 48))
+typedef double _Complex Type;
 
-int close_enough (double _Complex a, double _Complex b)
+int close_enough (Type a, Type b)
 {
-  double _Complex diff = a - b;
+  Type diff = a - b;
   double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
   double mag2_diff = (__real__(diff) * __real__ (diff)
 		     + __imag__ (diff) * __imag__ (diff));
@@ -17,9 +18,9 @@ int close_enough (double _Complex a, dou
 #define N 100
 
 static int __attribute__ ((noinline))
-vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
+vector (Type ary[N], Type sum, Type prod)
 {
-  double _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -41,9 +42,9 @@ vector (double _Complex ary[N], double _
 }
 
 static int __attribute__ ((noinline))
-worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
+worker (Type ary[N], Type sum, Type prod)
 {
-  double _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -65,9 +66,9 @@ worker (double _Complex ary[N], double _
 }
 
 static int __attribute__ ((noinline))
-gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
+gang (Type ary[N], Type sum, Type prod)
 {
-  double _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -90,7 +91,7 @@ gang (double _Complex ary[N], double _Co
 
 int main (void)
 {
-  double _Complex ary[N], sum = 0, prod = 1;
+  Type ary[N], sum = 0, prod = 1;
 
   for (int ix = 0; ix < N;  ix++)
     {
Index: testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c	(revision 230605)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c	(working copy)
@@ -3,10 +3,11 @@
 
 /* Single float has 23 bits of fraction. */
 #define FRAC (1.0f / (1 << 20))
+typedef float _Complex Type;
 
-int close_enough (float _Complex a, float _Complex b)
+int close_enough (Type a, Type b)
 {
-  float _Complex diff = a - b;
+  Type diff = a - b;
   float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
   float mag2_diff = (__real__(diff) * __real__ (diff)
 		     + __imag__ (diff) * __imag__ (diff));
@@ -17,9 +18,9 @@ int close_enough (float _Complex a, floa
 #define N 100
 
 static int __attribute__ ((noinline))
-vector (float _Complex ary[N], float _Complex sum, float _Complex prod)
+vector (Type ary[N], Type sum, Type prod)
 {
-  float _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -41,9 +42,9 @@ vector (float _Complex ary[N], float _Co
 }
 
 static int __attribute__ ((noinline))
-worker (float _Complex ary[N], float _Complex sum, float _Complex prod)
+worker (Type ary[N], Type sum, Type prod)
 {
-  float _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -65,9 +66,9 @@ worker (float _Complex ary[N], float _Co
 }
 
 static int __attribute__ ((noinline))
-gang (float _Complex ary[N], float _Complex sum, float _Complex prod)
+gang (Type ary[N], Type sum, Type prod)
 {
-  float _Complex tsum = 0, tprod = 1;
+  Type tsum = 0, tprod = 1;
 
 #pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
   {
@@ -90,7 +91,7 @@ gang (float _Complex ary[N], float _Comp
 
 int main (void)
 {
-  float _Complex ary[N], sum = 0, prod = 1;
+  Type ary[N], sum = 0, prod = 1;
 
   for (int ix = 0; ix < N;  ix++)
     {
Index: testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c	(revision 0)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c	(working copy)
@@ -0,0 +1,112 @@
+
+/* Double float has 53 bits of fraction. */
+#define FRAC (1.0 / (1LL << 48))
+typedef double Type;
+
+int close_enough (Type a, Type b)
+{
+  Type diff = a - b;
+  if (diff < 0)
+    diff = -diff;
+
+  return diff / a < FRAC;
+}
+
+#define N 100
+
+static int __attribute__ ((noinline))
+vector (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+static int __attribute__ ((noinline))
+worker (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+static int __attribute__ ((noinline))
+gang (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+int main (void)
+{
+  Type ary[N], sum = 0, prod = 1;
+
+  for (int ix = 0; ix < N;  ix++)
+    {
+      float frac = ix * (1.0f / 1024) + 1.0f;
+      
+      ary[ix] = frac;
+      sum += ary[ix];
+      prod *= ary[ix];
+    }
+
+  if (vector (ary, sum, prod))
+    return 1;
+  
+  if (worker (ary, sum, prod))
+    return 1;
+
+  if (gang (ary, sum, prod))
+    return 1;
+
+  return 0;
+}
Index: testsuite/libgomp.oacc-c-c++-common/reduction-flt.c
===================================================================
--- testsuite/libgomp.oacc-c-c++-common/reduction-flt.c	(revision 0)
+++ testsuite/libgomp.oacc-c-c++-common/reduction-flt.c	(working copy)
@@ -0,0 +1,112 @@
+
+/* Single float has 23 bits of fraction. */
+#define FRAC (1.0f / (1 << 20))
+typedef float Type;
+
+int close_enough (Type a, Type b)
+{
+  Type diff = a - b;
+  if (diff < 0)
+    diff = -diff;
+
+  return diff / a < FRAC;
+}
+
+#define N 100
+
+static int __attribute__ ((noinline))
+vector (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+static int __attribute__ ((noinline))
+worker (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+static int __attribute__ ((noinline))
+gang (Type ary[N], Type sum, Type prod)
+{
+  Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+  {
+#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
+    for (int ix = 0; ix < N; ix++)
+      {
+	tsum += ary[ix];
+	tprod *= ary[ix];
+      }
+  }
+
+  if (!close_enough (sum, tsum))
+    return 1;
+
+  if (!close_enough (prod, tprod))
+    return 1;
+
+  return 0;
+}
+
+int main (void)
+{
+  Type ary[N], sum = 0, prod = 1;
+
+  for (int ix = 0; ix < N;  ix++)
+    {
+      float frac = ix * (1.0f / 1024) + 1.0f;
+      
+      ary[ix] = frac;
+      sum += ary[ix];
+      prod *= ary[ix];
+    }
+
+  if (vector (ary, sum, prod))
+    return 1;
+  
+  if (worker (ary, sum, prod))
+    return 1;
+
+  if (gang (ary, sum, prod))
+    return 1;
+
+  return 0;
+}