diff mbox

[gomp4.5] Allow arbitrary low-bound on C/C++ array sections in reduction clauses

Message ID 20151016161432.GK478@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 16, 2015, 4:14 p.m. UTC
Hi!

The public draft required low bound in array sections to be zero
(but it was unclear if at compile time or at runtime only).
After discussions, the whole restriction got removed, so now low-bound can
be arbitrary.

The following patch implements that.  Committed to gomp-4_5-branch after
retesting on x86_64-linux.

2015-10-16  Jakub Jelinek  <jakub@redhat.com>

	* gimplify.c (gimplify_scan_omp_clauses): Gimplify variable
	low-bound for array reduction.  Look through POINTER_PLUS_EXPR
	when looking for ADDR_EXPR for array section reductions.
	* omp-low.c (scan_sharing_clauses): Look through POINTER_PLUS_EXPR
	for array section reductions.
	(lower_send_clauses): Likewise.
	(lower_rec_input_clauses): Handle non-zero low-bound on array
	section reductions.
	(lower_reduction_clauses): Likewise.
gcc/c/
	* c-typeck.c (handle_omp_array_sections_1): Allow non-zero low-bound
	on OMP_CLAUSE_REDUCTION array sections.
	(handle_omp_array_sections): Encode low-bound into the MEM_REF,
	either into the constant offset, or for variable low-bound
	using POINTER_PLUS_EXPR.
	(c_finish_omp_clauses): Look through POINTER_PLUS_EXPR
	for array section reductions.
gcc/cp/
	* semantics.c (handle_omp_array_sections_1): Allow non-zero low-bound
	on OMP_CLAUSE_REDUCTION array sections.
	(handle_omp_array_sections): Encode low-bound into the MEM_REF,
	either into the constant offset, or for variable low-bound
	using POINTER_PLUS_EXPR.
	(finish_omp_clauses): Look through POINTER_PLUS_EXPR
	for array section reductions.
gcc/testsuite/
	* c-c++-common/gomp/reduction-1.c (foo): Don't expect diagnostics
	on non-zero low-bound in reduction array sections.  Add further
	tests.
libgomp/
	* testsuite/libgomp.c/reduction-11.c: New test.
	* testsuite/libgomp.c/reduction-12.c: New test.
	* testsuite/libgomp.c/reduction-13.c: New test.
	* testsuite/libgomp.c/reduction-14.c: New test.
	* testsuite/libgomp.c/reduction-15.c: New test.
	* testsuite/libgomp.c++/reduction-11.C: New test.
	* testsuite/libgomp.c++/reduction-12.C: New test.


	Jakub
diff mbox

Patch

--- gcc/gimplify.c.jj	2015-10-14 10:25:43.000000000 +0200
+++ gcc/gimplify.c	2015-10-16 14:39:39.841597858 +0200
@@ -6326,6 +6326,23 @@  gimplify_scan_omp_clauses (tree *list_p,
 		  omp_notice_variable (ctx, v, true);
 		}
 	      decl = TREE_OPERAND (decl, 0);
+	      if (TREE_CODE (decl) == POINTER_PLUS_EXPR)
+		{
+		  if (gimplify_expr (&TREE_OPERAND (decl, 1), pre_p,
+				     NULL, is_gimple_val, fb_rvalue)
+		      == GS_ERROR)
+		    {
+		      remove = true;
+		      break;
+		    }
+		  v = TREE_OPERAND (decl, 1);
+		  if (DECL_P (v))
+		    {
+		      omp_firstprivatize_variable (ctx, v);
+		      omp_notice_variable (ctx, v, true);
+		    }
+		  decl = TREE_OPERAND (decl, 0);
+		}
 	      if (TREE_CODE (decl) == ADDR_EXPR
 		  || TREE_CODE (decl) == INDIRECT_REF)
 		decl = TREE_OPERAND (decl, 0);
@@ -6925,7 +6942,12 @@  gimplify_scan_omp_clauses (tree *list_p,
 		  || decl == OMP_CLAUSE_DECL (c)
 		  || (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF
 		      && (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0))
-			  == ADDR_EXPR)))
+			  == ADDR_EXPR
+			  || (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0))
+			      == POINTER_PLUS_EXPR
+			      && (TREE_CODE (TREE_OPERAND (TREE_OPERAND
+						(OMP_CLAUSE_DECL (c), 0), 0))
+				  == ADDR_EXPR)))))
 	      && omp_check_private (ctx, decl, false))
 	    {
 	      error ("%s variable %qE is private in outer context",
--- gcc/omp-low.c.jj	2015-10-14 18:04:13.000000000 +0200
+++ gcc/omp-low.c	2015-10-16 16:35:43.162945500 +0200
@@ -1919,6 +1919,8 @@  scan_sharing_clauses (tree clauses, omp_
 	      && TREE_CODE (decl) == MEM_REF)
 	    {
 	      tree t = TREE_OPERAND (decl, 0);
+	      if (TREE_CODE (t) == POINTER_PLUS_EXPR)
+		t = TREE_OPERAND (t, 0);
 	      if (TREE_CODE (t) == INDIRECT_REF
 		  || TREE_CODE (t) == ADDR_EXPR)
 		t = TREE_OPERAND (t, 0);
@@ -4247,6 +4249,8 @@  lower_rec_input_clauses (tree clauses, g
 	  if (c_kind == OMP_CLAUSE_REDUCTION && TREE_CODE (var) == MEM_REF)
 	    {
 	      var = TREE_OPERAND (var, 0);
+	      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+		var = TREE_OPERAND (var, 0);
 	      if (TREE_CODE (var) == INDIRECT_REF
 		  || TREE_CODE (var) == ADDR_EXPR)
 		var = TREE_OPERAND (var, 0);
@@ -4275,7 +4279,28 @@  lower_rec_input_clauses (tree clauses, g
 	      if (pass == 0)
 		continue;
 
+	      tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1);
 	      tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
+	      if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR)
+		{
+		  tree b = TREE_OPERAND (orig_var, 1);
+		  b = maybe_lookup_decl (b, ctx);
+		  if (b == NULL)
+		    {
+		      b = TREE_OPERAND (orig_var, 1);
+		      b = maybe_lookup_decl_in_outer_ctx (b, ctx);
+		    }
+		  if (integer_zerop (bias))
+		    bias = b;
+		  else
+		    {
+		      bias = fold_convert_loc (clause_loc,
+					       TREE_TYPE (b), bias);
+		      bias = fold_build2_loc (clause_loc, PLUS_EXPR,
+					      TREE_TYPE (b), b, bias);
+		    }
+		  orig_var = TREE_OPERAND (orig_var, 0);
+		}
 	      if (TREE_CODE (orig_var) == INDIRECT_REF
 		  || TREE_CODE (orig_var) == ADDR_EXPR)
 		orig_var = TREE_OPERAND (orig_var, 0);
@@ -4316,7 +4341,24 @@  lower_rec_input_clauses (tree clauses, g
 	      tree y = create_tmp_var (ptype, name);
 	      gimplify_assign (y, x, ilist);
 	      x = y;
-	      if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR)
+	      tree yb = y;
+
+	      if (!integer_zerop (bias))
+		{
+		  bias = fold_convert_loc (clause_loc, sizetype, bias);
+		  bias = fold_build1_loc (clause_loc, NEGATE_EXPR,
+					  sizetype, bias);
+		  x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
+				       TREE_TYPE (x), x, bias);
+		  yb = create_tmp_var (ptype, name);
+		  gimplify_assign (yb, x, ilist);
+		  x = yb;
+		}
+
+	      d = TREE_OPERAND (d, 0);
+	      if (TREE_CODE (d) == POINTER_PLUS_EXPR)
+		d = TREE_OPERAND (d, 0);
+	      if (TREE_CODE (d) == ADDR_EXPR)
 		{
 		  if (orig_var != var)
 		    {
@@ -4342,11 +4384,11 @@  lower_rec_input_clauses (tree clauses, g
 	      else
 		{
 		  gcc_assert (orig_var == var);
-		  if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF)
+		  if (TREE_CODE (d) == INDIRECT_REF)
 		    {
 		      x = create_tmp_var (ptype, name);
 		      TREE_ADDRESSABLE (x) = 1;
-		      gimplify_assign (x, y, ilist);
+		      gimplify_assign (x, yb, ilist);
 		      x = build_fold_addr_expr_loc (clause_loc, x);
 		    }
 		  x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
@@ -4363,9 +4405,9 @@  lower_rec_input_clauses (tree clauses, g
 		  gimplify_assign (y2, y, ilist);
 		  tree ref = build_outer_var_ref (var, ctx);
 		  /* For ref build_outer_var_ref already performs this.  */
-		  if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF)
+		  if (TREE_CODE (d) == INDIRECT_REF)
 		    gcc_assert (is_reference (var));
-		  else if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR)
+		  else if (TREE_CODE (d) == ADDR_EXPR)
 		    ref = build_fold_addr_expr (ref);
 		  else if (is_reference (var))
 		    ref = build_fold_addr_expr (ref);
@@ -5338,6 +5380,8 @@  lower_reduction_clauses (tree clauses, g
       if (TREE_CODE (var) == MEM_REF)
 	{
 	  var = TREE_OPERAND (var, 0);
+	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+	    var = TREE_OPERAND (var, 0);
 	  if (TREE_CODE (var) == INDIRECT_REF
 	      || TREE_CODE (var) == ADDR_EXPR)
 	    var = TREE_OPERAND (var, 0);
@@ -5386,14 +5430,35 @@  lower_reduction_clauses (tree clauses, g
 	  tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
 	  tree i = create_tmp_var (TREE_TYPE (v), NULL);
 	  tree ptype = build_pointer_type (TREE_TYPE (type));
+	  tree bias = TREE_OPERAND (d, 1);
+	  d = TREE_OPERAND (d, 0);
+	  if (TREE_CODE (d) == POINTER_PLUS_EXPR)
+	    {
+	      tree b = TREE_OPERAND (d, 1);
+	      b = maybe_lookup_decl (b, ctx);
+	      if (b == NULL)
+		{
+		  b = TREE_OPERAND (d, 1);
+		  b = maybe_lookup_decl_in_outer_ctx (b, ctx);
+		}
+	      if (integer_zerop (bias))
+		bias = b;
+	      else
+		{
+		  bias = fold_convert_loc (clause_loc, TREE_TYPE (b), bias);
+		  bias = fold_build2_loc (clause_loc, PLUS_EXPR,
+					  TREE_TYPE (b), b, bias);
+		}
+	      d = TREE_OPERAND (d, 0);
+	    }
 	  /* For ref build_outer_var_ref already performs this, so
 	     only new_var needs a dereference.  */
-	  if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF)
+	  if (TREE_CODE (d) == INDIRECT_REF)
 	    {
 	      new_var = build_simple_mem_ref_loc (clause_loc, new_var);
 	      gcc_assert (is_reference (var) && var == orig_var);
 	    }
-	  else if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR)
+	  else if (TREE_CODE (d) == ADDR_EXPR)
 	    {
 	      if (orig_var == var)
 		{
@@ -5416,6 +5481,15 @@  lower_reduction_clauses (tree clauses, g
 		v = maybe_lookup_decl_in_outer_ctx (v, ctx);
 	      gimplify_expr (&v, stmt_seqp, NULL, is_gimple_val, fb_rvalue);
 	    }
+	  if (!integer_zerop (bias))
+	    {
+	      bias = fold_convert_loc (clause_loc, sizetype, bias);
+	      new_var = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
+					 TREE_TYPE (new_var), new_var,
+					 unshare_expr (bias));
+	      ref = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
+					 TREE_TYPE (ref), ref, bias);
+	    }
 	  new_var = fold_convert_loc (clause_loc, ptype, new_var);
 	  ref = fold_convert_loc (clause_loc, ptype, ref);
 	  tree m = create_tmp_var (ptype, NULL);
@@ -5608,6 +5682,8 @@  lower_send_clauses (tree clauses, gimple
 	  && TREE_CODE (val) == MEM_REF)
 	{
 	  val = TREE_OPERAND (val, 0);
+	  if (TREE_CODE (val) == POINTER_PLUS_EXPR)
+	    val = TREE_OPERAND (val, 0);
 	  if (TREE_CODE (val) == INDIRECT_REF
 	      || TREE_CODE (val) == ADDR_EXPR)
 	    val = TREE_OPERAND (val, 0);
--- gcc/c/c-typeck.c.jj	2015-10-15 18:17:41.000000000 +0200
+++ gcc/c/c-typeck.c	2015-10-16 09:38:33.935389555 +0200
@@ -11779,13 +11779,6 @@  handle_omp_array_sections_1 (tree c, tre
 	  && (TREE_CODE (length) != INTEGER_CST || integer_onep (length)))
 	first_non_one++;
     }
-  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
-      && !integer_zerop (low_bound))
-    {
-      error_at (OMP_CLAUSE_LOCATION (c),
-		"%<reduction%> array section has to be zero-based");
-      return error_mark_node;
-    }
   if (TREE_CODE (type) == ARRAY_TYPE)
     {
       if (length == NULL_TREE
@@ -12126,7 +12119,24 @@  handle_omp_array_sections (tree c, bool
 	  tree ptype = build_pointer_type (eltype);
 	  if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE)
 	    t = build_fold_addr_expr (t);
-	  t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0));
+	  tree t2 = build_fold_addr_expr (first);
+	  t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+				 ptrdiff_type_node, t2);
+	  t2 = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR,
+				ptrdiff_type_node, t2,
+				fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+						  ptrdiff_type_node, t));
+	  t2 = c_fully_fold (t2, false, NULL);
+	  if (tree_fits_shwi_p (t2))
+	    t = build2 (MEM_REF, type, t,
+			build_int_cst (ptype, tree_to_shwi (t2)));
+	  else
+	    {
+	      t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c), sizetype, t2);
+	      t = build2_loc (OMP_CLAUSE_LOCATION (c), POINTER_PLUS_EXPR,
+			      TREE_TYPE (t), t, t2);
+	      t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0));
+	    }
 	  OMP_CLAUSE_DECL (c) = t;
 	  return false;
 	}
@@ -12466,6 +12476,8 @@  c_finish_omp_clauses (tree clauses, bool
 		  break;
 		}
 	      t = TREE_OPERAND (t, 0);
+	      if (TREE_CODE (t) == POINTER_PLUS_EXPR)
+		t = TREE_OPERAND (t, 0);
 	      if (TREE_CODE (t) == ADDR_EXPR)
 		t = TREE_OPERAND (t, 0);
 	    }
--- gcc/cp/semantics.c.jj	2015-10-15 18:32:24.000000000 +0200
+++ gcc/cp/semantics.c	2015-10-16 17:08:35.278026456 +0200
@@ -4519,13 +4519,6 @@  handle_omp_array_sections_1 (tree c, tre
 	  && (TREE_CODE (length) != INTEGER_CST || integer_onep (length)))
 	first_non_one++;
     }
-  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
-      && !integer_zerop (low_bound))
-    {
-      error_at (OMP_CLAUSE_LOCATION (c),
-		"%<reduction%> array section has to be zero-based");
-      return error_mark_node;
-    }
   if (TREE_CODE (type) == ARRAY_TYPE)
     {
       if (length == NULL_TREE
@@ -4866,7 +4859,24 @@  handle_omp_array_sections (tree c, bool
 		t = convert_from_reference (t);
 	      else if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE)
 		t = build_fold_addr_expr (t);
-	      t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0));
+	      tree t2 = build_fold_addr_expr (first);
+	      t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+				     ptrdiff_type_node, t2);
+	      t2 = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR,
+				    ptrdiff_type_node, t2,
+				    fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+						      ptrdiff_type_node, t));
+	      if (tree_fits_shwi_p (t2))
+		t = build2 (MEM_REF, type, t,
+			    build_int_cst (ptype, tree_to_shwi (t2)));
+	      else
+		{
+		  t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+					 sizetype, t2);
+		  t = build2_loc (OMP_CLAUSE_LOCATION (c), POINTER_PLUS_EXPR,
+				  TREE_TYPE (t), t, t2);
+		  t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0));
+		}
 	      OMP_CLAUSE_DECL (c) = t;
 	      return false;
 	    }
@@ -5694,6 +5704,8 @@  finish_omp_clauses (tree clauses, bool a
 		{
 		  gcc_assert (TREE_CODE (t) == MEM_REF);
 		  t = TREE_OPERAND (t, 0);
+		  if (TREE_CODE (t) == POINTER_PLUS_EXPR)
+		    t = TREE_OPERAND (t, 0);
 		  if (TREE_CODE (t) == ADDR_EXPR
 		      || TREE_CODE (t) == INDIRECT_REF)
 		    t = TREE_OPERAND (t, 0);
--- gcc/testsuite/c-c++-common/gomp/reduction-1.c.jj	2015-10-14 10:25:30.000000000 +0200
+++ gcc/testsuite/c-c++-common/gomp/reduction-1.c	2015-10-16 14:51:06.709543427 +0200
@@ -26,9 +26,17 @@  foo (int a[10][10][10], int **b, int x)
     bar (a);
   #pragma omp parallel reduction(+: a[0:4])
     bar (a);
-  #pragma omp parallel reduction(+: a[2:4])		/* { dg-error "array section has to be zero-based" } */
+  #pragma omp parallel reduction(+: a[2:4])
     bar (a);
-  #pragma omp parallel reduction(+: e[2:4])		/* { dg-error "array section has to be zero-based" } */
+  #pragma omp parallel reduction(+: e[2:4])
+    bar (a);
+  #pragma omp parallel reduction(+: a[x:4])
+    bar (a);
+  #pragma omp parallel reduction(+: e[x:4])
+    bar (a);
+  #pragma omp parallel reduction(+: a[x:x])
+    bar (a);
+  #pragma omp parallel reduction(+: e[x:x])
     bar (a);
   #pragma omp parallel reduction(+: a[0.5:2])		/* { dg-error "low bound \[^\n\r]* of array section does not have integral type" } */
     bar (a);
--- libgomp/testsuite/libgomp.c/reduction-11.c.jj	2015-10-16 14:57:59.000502905 +0200
+++ libgomp/testsuite/libgomp.c/reduction-11.c	2015-10-16 16:37:35.162308552 +0200
@@ -0,0 +1,60 @@ 
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*x)[3][2], int *y, long w[1][2], int s, int t)
+{
+  unsigned long long a[9] = {};
+  short b[5] = {};
+  int i;
+  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
+			   reduction(&:w[s + 1:1][t:2]) reduction(max:b[2:])
+  for (i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
+      if ((i & 15) == 1)
+	y[1] *= 3;
+      if ((i & 31) == 2)
+	y[2] *= 7;
+      if ((i & 63) == 3)
+	y[3] *= 17;
+      z[i / 32 + 2] += (i & 3);
+      if (i < 4)
+	z[i + 2] += i;
+      a[i / 32 + 2] |= 1ULL << (i & 30);
+      w[0][i & 1] &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (i = 0; i < 9; i++)
+    if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  int a[4][3][2] = {};
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  int y[5] = { 0, 1, 1, 1, 0 };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  long w[1][2] = { ~0L, ~0L };
+  foo (&a[2], y, w, -1, 0);
+  if (__builtin_memcmp (a, a2, sizeof (a))
+      || __builtin_memcmp (y, y2, sizeof (y))
+      || __builtin_memcmp (z, z2, sizeof (z))
+      || w[0][0] != ~0x249249L
+      || w[0][1] != ~0x249249L)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/reduction-12.c.jj	2015-10-16 16:40:11.638021550 +0200
+++ libgomp/testsuite/libgomp.c/reduction-12.c	2015-10-16 16:43:30.244118783 +0200
@@ -0,0 +1,94 @@ 
+struct A { int t; };
+struct B { char t; };
+struct C { unsigned long long t; };
+struct D { long t; };
+void
+add (struct B *x, struct B *y)
+{
+  x->t += y->t;
+}
+void
+zero (struct B *x)
+{
+  x->t = 0;
+}
+void
+orit (struct C *x, struct C *y)
+{
+  y->t |= x->t;
+}
+#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv))
+#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 })
+#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L })
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+struct B z[10];
+
+__attribute__((noinline, noclone)) void
+foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int s, int t)
+{
+  struct C a[9] = {};
+  short b[5] = {};
+  int i;
+  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
+			   reduction(&:w[s + 1:1][t:2]) reduction(maxb:b[2:])
+  for (i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
+      if ((i & 15) == 1)
+	y[1].t *= 3;
+      if ((i & 31) == 2)
+	y[2].t *= 7;
+      if ((i & 63) == 3)
+	y[3].t *= 17;
+      z[i / 32 + 2].t += (i & 3);
+      if (i < 4)
+	z[i + 2].t += i;
+      a[i / 32 + 2].t |= 1ULL << (i & 30);
+      w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (i = 0; i < 9; i++)
+    if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  struct A a[4][3][2] = {};
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  struct D w[1][2] = { { { ~0L }, { ~0L } } };
+  foo (&a[2], y, w, -1, 0);
+  int i, j, k;
+  for (i = 0; i < 4; i++)
+    for (j = 0; j < 3; j++)
+      for (k = 0; k < 2; k++)
+	if (a[i][j][k].t != a2[i][j][k])
+	  __builtin_abort ();
+  for (i = 0; i < 5; i++)
+    if (y[i].t != y2[i])
+      __builtin_abort ();
+  for (i = 0; i < 10; i++)
+    if (z[i].t != z2[i])
+      __builtin_abort ();
+  if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/reduction-13.c.jj	2015-10-16 16:47:30.250609204 +0200
+++ libgomp/testsuite/libgomp.c/reduction-13.c	2015-10-16 16:56:20.801848488 +0200
@@ -0,0 +1,67 @@ 
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*x)[3][2], int *y, long w[1][2], int p1, long p2, long p3, int p4,
+     int p5, long p6, short p7, int s, int t)
+{
+  unsigned long long a[p7 + 4];
+  short b[p7];
+  int i;
+  for (i = 0; i < p7 + 4; i++)
+    {
+      if (i < p7)
+	b[i] = -6;
+      a[i] = 0;
+    }
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2], z[t + 2:p3]) \
+			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \
+			   reduction(&:w[s + 1:p6 - 1][t:p6]) reduction(max:b[2:])
+  for (i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
+      if ((i & 15) == 1)
+	y[1] *= 3;
+      if ((i & 31) == 2)
+	y[2] *= 7;
+      if ((i & 63) == 3)
+	y[3] *= 17;
+      z[i / 32 + 2] += (i & 3);
+      if (i < 4)
+	z[i + 2] += i;
+      a[i / 32 + 2] |= 1ULL << (i & 30);
+      w[0][i & 1] &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (i = 0; i < 9; i++)
+    if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (b[0] != -6 || b[1] != -6 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  int a[4][3][2] = {};
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  int y[5] = { 0, 1, 1, 1, 0 };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  long w[1][2] = { ~0L, ~0L };
+  foo (&a[2], y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0);
+  if (__builtin_memcmp (a, a2, sizeof (a))
+      || __builtin_memcmp (y, y2, sizeof (y))
+      || __builtin_memcmp (z, z2, sizeof (z))
+      || w[0][0] != ~0x249249L
+      || w[0][1] != ~0x249249L)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/reduction-14.c.jj	2015-10-16 16:51:45.162880438 +0200
+++ libgomp/testsuite/libgomp.c/reduction-14.c	2015-10-16 16:56:38.547588910 +0200
@@ -0,0 +1,101 @@ 
+struct A { int t; };
+struct B { char t; };
+struct C { unsigned long long t; };
+struct D { long t; };
+void
+add (struct B *x, struct B *y)
+{
+  x->t += y->t;
+}
+void
+zero (struct B *x)
+{
+  x->t = 0;
+}
+void
+orit (struct C *x, struct C *y)
+{
+  y->t |= x->t;
+}
+#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv))
+#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 })
+#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L })
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+struct B z[10];
+
+__attribute__((noinline, noclone)) void
+foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int p1, long p2, long p3, int p4,
+     int p5, long p6, short p7, int s, int t)
+{
+  struct C a[p7 + 4];
+  short b[p7];
+  int i;
+  for (i = 0; i < p7 + 4; i++)
+    {
+      if (i < p7)
+	b[i] = -6;
+      a[i].t = 0;
+    }
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2], z[t + 2:p3]) \
+			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \
+			   reduction(&:w[s + 1:p6 - 1][t:p6]) reduction(maxb:b[2:])
+  for (i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
+      if ((i & 15) == 1)
+	y[1].t *= 3;
+      if ((i & 31) == 2)
+	y[2].t *= 7;
+      if ((i & 63) == 3)
+	y[3].t *= 17;
+      z[i / 32 + 2].t += (i & 3);
+      if (i < 4)
+	z[i + 2].t += i;
+      a[i / 32 + 2].t |= 1ULL << (i & 30);
+      w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (i = 0; i < 9; i++)
+    if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (b[0] != -6 || b[1] != -6 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  struct A a[4][3][2] = {};
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  struct D w[1][2] = { { { ~0L }, { ~0L } } };
+  foo (&a[2], y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0);
+  int i, j, k;
+  for (i = 0; i < 4; i++)
+    for (j = 0; j < 3; j++)
+      for (k = 0; k < 2; k++)
+	if (a[i][j][k].t != a2[i][j][k])
+	  __builtin_abort ();
+  for (i = 0; i < 5; i++)
+    if (y[i].t != y2[i])
+      __builtin_abort ();
+  for (i = 0; i < 10; i++)
+    if (z[i].t != z2[i])
+      __builtin_abort ();
+  if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/reduction-15.c.jj	2015-10-16 17:40:17.546234529 +0200
+++ libgomp/testsuite/libgomp.c/reduction-15.c	2015-10-16 17:47:41.357746092 +0200
@@ -0,0 +1,56 @@ 
+extern void abort (void);
+int a[16], b[16], c[16], d[5][2];
+
+__attribute__((noinline, noclone)) void
+foo (int x, int y)
+{
+  int i;
+  #pragma omp for schedule (static, 1) reduction (+:a[:3])
+  for (i = 0; i < 64; i++)
+    {
+      a[0] += i;
+      a[1] += 2 * i;
+      a[2] += 3 * i;
+    }
+  #pragma omp for schedule (guided) reduction (+:b[4:3])
+  for (i = 0; i < 64; i++)
+    {
+      b[4] += i;
+      b[5] += 2 * i;
+      b[6] += 3 * i;
+    }
+  #pragma omp for schedule (static) reduction (+:c[x:4])
+  for (i = 0; i < 64; i++)
+    {
+      c[9] += i;
+      c[10] += 2 * i;
+      c[11] += 3 * i;
+      c[12] += 4 * i;
+    }
+  #pragma omp for reduction (+:d[x - 8:2][y:])
+  for (i = 0; i < 64; i++)
+    {
+      d[1][0] += i;
+      d[1][1] += 2 * i;
+      d[2][0] += 3 * i;
+      d[2][1] += 4 * i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+  #pragma omp parallel
+  foo (9, 0);
+  for (i = 0; i < 16; i++)
+    if (a[i] != (i < 3 ? 64 * 63 / 2 * (i + 1) : 0)
+	|| b[i] != ((i >= 4 && i < 7) ? 64 * 63 / 2 * (i - 3) : 0)
+	|| c[i] != ((i >= 9 && i < 13) ? 64 * 63 / 2 * (i - 8) : 0))
+      abort ();
+  for (i = 0; i < 5; i++)
+    if (d[i][0] != ((i && i <= 2) ? 64 * 63 / 2 * (2 * i - 1) : 0)
+	|| d[i][1] != ((i && i <= 2) ? 64 * 63 / 2 * (2 * i) : 0))
+      abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/reduction-11.C.jj	2015-10-16 17:12:48.767317531 +0200
+++ libgomp/testsuite/libgomp.c++/reduction-11.C	2015-10-16 17:30:41.880612286 +0200
@@ -0,0 +1,119 @@ 
+char z[10] = { 0 };
+
+__attribute__((noinline, noclone)) void
+foo (int (*&x)[3][2], int *y, long (&w)[1][2], int s, int t)
+{
+  unsigned long long a[9] = {};
+  short b[5] = {};
+  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
+			   reduction(&:w[s + 1:][t:2]) reduction(max:b[2:])
+  for (int i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
+      if ((i & 15) == 1)
+	y[1] *= 3;
+      if ((i & 31) == 2)
+	y[2] *= 7;
+      if ((i & 63) == 3)
+	y[3] *= 17;
+      z[i / 32 + 2] += (i & 3);
+      if (i < 4)
+	z[i + 2] += i;
+      a[i / 32 + 2] |= 1ULL << (i & 30);
+      w[0][i & 1] &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (int i = 0; i < 9; i++)
+    if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127)
+    __builtin_abort ();
+}
+
+int a3[4][3][2];
+int (*p3)[3][2] = &a3[2];
+int y3[5] = { 0, 1, 1, 1, 0 };
+long w3[1][2] = { ~0L, ~0L };
+short bb[5];
+
+struct S
+{
+  int (*&x)[3][2];
+  int *y;
+  long (&w)[1][2];
+  char z[10];
+  short (&b)[5];
+  unsigned long long a[9];
+  S() : x(p3), y(y3), w(w3), z(), a(), b(bb) {}
+  __attribute__((noinline, noclone)) void foo (int s, int t);
+};
+
+void
+S::foo (int s, int t)
+{
+  #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \
+			   reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \
+			   reduction(&:w[s + 1:][t:2]) reduction(max:b[2:])
+  for (int i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1] += i;
+      if ((i & 15) == 1)
+	y[1] *= 3;
+      if ((i & 31) == 2)
+	y[2] *= 7;
+      if ((i & 63) == 3)
+	y[3] *= 17;
+      z[i / 32 + 2] += (i & 3);
+      if (i < 4)
+	z[i + 2] += i;
+      a[i / 32 + 2] |= 1ULL << (i & 30);
+      w[0][i & 1] &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[2])
+	b[2] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+}
+
+int
+main ()
+{
+  int a[4][3][2] = {};
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  int (*p)[3][2] = &a[2];
+  int y[5] = { 0, 1, 1, 1, 0 };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  long w[1][2] = { ~0L, ~0L };
+  foo (p, y, w, -1, 0);
+  if (__builtin_memcmp (a, a2, sizeof (a))
+      || __builtin_memcmp (y, y2, sizeof (y))
+      || __builtin_memcmp (z, z2, sizeof (z))
+      || w[0][0] != ~0x249249L
+      || w[0][1] != ~0x249249L)
+    __builtin_abort ();
+  S s;
+  s.foo (-1, 0);
+  for (int i = 0; i < 9; i++)
+    if (s.a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (__builtin_memcmp (a3, a2, sizeof (a3))
+      || __builtin_memcmp (y3, y2, sizeof (y3))
+      || __builtin_memcmp (s.z, z2, sizeof (s.z))
+      || w3[0][0] != ~0x249249L
+      || w3[0][1] != ~0x249249L)
+    __builtin_abort ();
+  if (bb[0] != 0 || bb[1] != 0 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127)
+    __builtin_abort ();
+}
--- libgomp/testsuite/libgomp.c++/reduction-12.C.jj	2015-10-16 17:19:13.717685135 +0200
+++ libgomp/testsuite/libgomp.c++/reduction-12.C	2015-10-16 17:30:52.359458902 +0200
@@ -0,0 +1,193 @@ 
+template <typename T>
+struct A
+{
+  A () { t = 0; }
+  A (T x) { t = x; }
+  A (const A &x) { t = x.t; }
+  ~A () {}
+  T t;
+};
+template <typename T>
+struct M
+{
+  M () { t = 1; }
+  M (T x) { t = x; }
+  M (const M &x) { t = x.t; }
+  ~M () {}
+  T t;
+};
+template <typename T>
+struct B
+{
+  B () { t = ~(T) 0; }
+  B (T x) { t = x; }
+  B (const B &x) { t = x.t; }
+  ~B () {}
+  T t;
+};
+template <typename T>
+void
+add (T &x, T &y)
+{
+  x.t += y.t;
+}
+template <typename T>
+void
+zero (T &x)
+{
+  x.t = 0;
+}
+template <typename T>
+void
+orit (T *x, T *y)
+{
+  y->t |= x->t;
+}
+B<long> bb;
+#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t)
+#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv))
+#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1)
+#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out))
+#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig))
+#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6)
+
+A<char> z[10];
+
+template <int N>
+__attribute__((noinline, noclone)) void
+foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3, int p4,
+     int p5, long p6, short p7, int s, int t)
+{
+  A<unsigned long long> a[p7 + 4];
+  short bb[p7];
+  short (&b)[p7] = bb;
+  for (int i = 0; i < p7; i++)
+    bb[i] = -6;
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2 + N - 2], z[t + N:p3]) \
+			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5 - N + 2]) \
+			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:])
+  for (int i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
+      if ((i & 15) == 1)
+	y[1].t *= 3;
+      if ((i & 31) == N)
+	y[2].t *= 7;
+      if ((i & 63) == 3)
+	y[N + 1].t *= 17;
+      z[i / 32 + 2].t += (i & 3);
+      if (i < 4)
+	z[i + N].t += i;
+      a[i / 32 + 2].t |= 1ULL << (i & 30);
+      w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[N])
+	b[N] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+  for (int i = 0; i < 9; i++)
+    if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  if (bb[0] != -6 || bb[1] != -6 || bb[N] != 22 || bb[3] != 84 || bb[4] != 127)
+    __builtin_abort ();
+}
+
+A<int> a3[4][3][2];
+A<int> (*p3)[3][2] = &a3[2];
+M<int> y3[5] = { 0, 1, 1, 1, 0 };
+B<long> w3[1][2];
+
+template <int N>
+struct S
+{
+  A<int> (*&x)[3][N];
+  M<int> *y;
+  B<long> (&w)[1][N];
+  A<char> z[10];
+  short b[5];
+  A<unsigned long long> a[9];
+  S() : x(p3), y(y3), w(w3), z(), a(), b() {}
+  __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short, int, int);
+};
+
+template <int N>
+void
+S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7, int s, int t)
+{
+  #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2][0:N], z[t + N:p3 + N - 2]) \
+			   reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \
+			   reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:])
+  for (int i = 0; i < 128; i++)
+    {
+      x[i / 64 - 1][i % 3][(i / 4) & 1].t += i;
+      if ((i & 15) == 1)
+	y[1].t *= 3;
+      if ((i & 31) == N)
+	y[2].t *= 7;
+      if ((i & 63) == 3)
+	y[N + 1].t *= 17;
+      z[i / 32 + 2].t += (i & 3);
+      if (i < 4)
+	z[i + N].t += i;
+      a[i / 32 + 2].t |= 1ULL << (i & 30);
+      w[0][i & 1].t &= ~(1L << (i / 17 * 3));
+      if ((i % 23) > b[N])
+	b[N] = i % 23;
+      if ((i % 85) > b[3])
+	b[3] = i % 85;
+      if ((i % 192) > b[4])
+	b[4] = i % 192;
+    }
+}
+
+int
+main ()
+{
+  A<int> a[4][3][2];
+  static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }},
+			    {{ 312, 381 }, { 295, 356 }, { 337, 335 }},
+			    {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }},
+			    {{ 0, 0 }, { 0, 0 }, { 0, 0 }}};
+  A<int> (*p)[3][2] = &a[2];
+  M<int> y[5] = { 0, 1, 1, 1, 0 };
+  int y2[5] = { 0, 6561, 2401, 289, 0 };
+  char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 };
+  B<long> w[1][2];
+  foo<2> (p, y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0);
+  for (int i = 0; i < 4; i++)
+    for (int j = 0; j < 3; j++)
+      for (int k = 0; k < 2; k++)
+	if (a[i][j][k].t != a2[i][j][k])
+	  __builtin_abort ();
+  for (int i = 0; i < 5; i++)
+    if (y[i].t != y2[i])
+      __builtin_abort ();
+  for (int i = 0; i < 10; i++)
+    if (z[i].t != z2[i])
+      __builtin_abort ();
+  if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L)
+    __builtin_abort ();
+  S<2> s;
+  s.foo (1, 3L, 4L, 3, 4, 2L, 5, -1, 0);
+  for (int i = 0; i < 9; i++)
+    if (s.a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0))
+      __builtin_abort ();
+  for (int i = 0; i < 4; i++)
+    for (int j = 0; j < 3; j++)
+      for (int k = 0; k < 2; k++)
+	if (a3[i][j][k].t != a2[i][j][k])
+	  __builtin_abort ();
+  for (int i = 0; i < 5; i++)
+    if (y3[i].t != y2[i])
+      __builtin_abort ();
+  for (int i = 0; i < 10; i++)
+    if (s.z[i].t != z2[i])
+      __builtin_abort ();
+  if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L)
+    __builtin_abort ();
+  if (s.b[0] != 0 || s.b[1] != 0 || s.b[2] != 22
+      || s.b[3] != 84 || s.b[4] != 127)
+    __builtin_abort ();
+}