[committed] Fix OpenMP class iterators in distribute parallel for (PR c++/86443)

Message ID 20180710071818.GB7166@tucnak
State New
Headers show
Series
  • [committed] Fix OpenMP class iterators in distribute parallel for (PR c++/86443)
Related show

Commit Message

Jakub Jelinek July 10, 2018, 7:18 a.m.
Hi!

While working on OpenMP 5.0 range-for support, I've discovered that even for
normal class iterators distribute parallel for gimplification ICEs in
several ways (other composite loop constructs work only because class
iterators are not allowed on them).  The problem is that the FEs emit the
code that needs to be done before computing number of the iterations around
the innermost construct, which we then wrap into OMP_PARALLEL and
OMP_DISTRIBUTE and then we want to compute number of iterations on the
OMP_DISTRIBUTE.  The following patch fixes it by detecting these cases and
moving the outer composite constructs right around the innermost one.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2018-07-10  Jakub Jelinek  <jakub@redhat.com>

	PR c++/86443
	* gimplify.c (find_combined_omp_for): Add DATA argument, in addition
	to finding the inner OMP_FOR/OMP_SIMD stmt find non-trivial wrappers,
	BLOCKs with BLOCK_VARs, OMP_PARALLEL in between, OMP_FOR in between.
	(gimplify_omp_for): For composite loops, move outer
	OMP_{DISTRIBUTE,TASKLOOP,FOR,PARALLEL} right around innermost
	OMP_FOR/OMP_SIMD if there are any non-trivial wrappers.  For class
	iterators add any needed clauses.  Allow OMP_FOR_ORIG_DECLS to contain
	TREE_LIST for both the original class iterator and the "last" helper
	var.  Gimplify OMP_FOR_PRE_BODY before the outermost composite
	loop, remember has_decl_expr from outer composite loops for the
	innermost OMP_SIMD in TREE_PRIVATE bit on OMP_FOR_INIT.
gcc/c-family/
	* c-omp.c (c_omp_check_loop_iv_r, c_omp_check_loop_iv): Allow declv
	to contain TREE_LIST for both the original class iterator and the
	"last" helper var.
gcc/cp/
	* semantics.c (handle_omp_for_class_iterator): Remove lastp argument,
	instead of setting *lastp turn orig_declv elt into a TREE_LIST.
	(finish_omp_for): Adjust handle_omp_for_class_iterator caller.
	* pt.c (tsubst_omp_for_iterator): Allow OMP_FOR_ORIG_DECLS to contain
	TREE_LIST for both the original class iterator and the "last" helper
	var.
libgomp/
	* testsuite/libgomp.c++/for-15.C: New test.


	Jakub

Comments

Jakub Jelinek July 11, 2018, 1:18 p.m. | #1
On Tue, Jul 10, 2018 at 09:18:18AM +0200, Jakub Jelinek wrote:
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

I found two small issues and one big issue (results being declare target)
which break the test if using non-shared memory offloading.

This should fix it, tested on x86_64-linux, committed to trunk.

2018-07-11  Jakub Jelinek  <jakub@redhat.com>

	PR c++/86443
	* testsuite/libgomp.c++/for-15.C (a): Remove unused variable.
	(results): Make sure the variable is not inside declare target region.
	(qux): Remove unused function.

--- libgomp/testsuite/libgomp.c++/for-15.C	(revision 262551)
+++ libgomp/testsuite/libgomp.c++/for-15.C	(working copy)
@@ -88,10 +88,11 @@ private:
 
 template <typename T> const I<T> &J<T>::begin () { return b; }
 template <typename T> const I<T> &J<T>::end () { return e; }
+#pragma omp end declare target
 
-int a[2000];
 int results[2000];
 
+#pragma omp declare target
 template <typename T>
 void
 baz (I<T> &i)
@@ -110,13 +111,6 @@ baz (int i)
 }
 
 void
-qux (I<int> &i)
-{
-  if (*i != 1931)
-    abort ();
-}
-
-void
 f1 (J<int> j)
 {
 #pragma omp distribute parallel for default(none)


	Jakub

Patch

--- gcc/gimplify.c.jj	2018-07-07 09:45:42.133890332 +0200
+++ gcc/gimplify.c	2018-07-09 15:47:14.587400243 +0200
@@ -9532,24 +9532,53 @@  gimplify_omp_task (tree *expr_p, gimple_
 }
 
 /* Helper function of gimplify_omp_for, find OMP_FOR resp. OMP_SIMD
-   with non-NULL OMP_FOR_INIT.  */
+   with non-NULL OMP_FOR_INIT.  Also, fill in pdata array,
+   pdata[0] non-NULL if there is anything non-trivial in between, pdata[1]
+   is address of OMP_PARALLEL in between if any, pdata[2] is address of
+   OMP_FOR in between if any and pdata[3] is address of the inner
+   OMP_FOR/OMP_SIMD.  */
 
 static tree
-find_combined_omp_for (tree *tp, int *walk_subtrees, void *)
+find_combined_omp_for (tree *tp, int *walk_subtrees, void *data)
 {
+  tree **pdata = (tree **) data;
   *walk_subtrees = 0;
   switch (TREE_CODE (*tp))
     {
     case OMP_FOR:
+      if (OMP_FOR_INIT (*tp) != NULL_TREE)
+	{
+	  pdata[3] = tp;
+	  return *tp;
+	}
+      pdata[2] = tp;
       *walk_subtrees = 1;
-      /* FALLTHRU */
+      break;
     case OMP_SIMD:
       if (OMP_FOR_INIT (*tp) != NULL_TREE)
-	return *tp;
+	{
+	  pdata[3] = tp;
+	  return *tp;
+	}
       break;
     case BIND_EXPR:
+      if (BIND_EXPR_VARS (*tp)
+	  || (BIND_EXPR_BLOCK (*tp)
+	      && BLOCK_VARS (BIND_EXPR_BLOCK (*tp))))
+	pdata[0] = tp;
+      *walk_subtrees = 1;
+      break;
     case STATEMENT_LIST:
+      if (!tsi_one_before_end_p (tsi_start (*tp)))
+	pdata[0] = tp;
+      *walk_subtrees = 1;
+      break;
+    case TRY_FINALLY_EXPR:
+      pdata[0] = tp;
+      *walk_subtrees = 1;
+      break;
     case OMP_PARALLEL:
+      pdata[1] = tp;
       *walk_subtrees = 1;
       break;
     default:
@@ -9574,6 +9603,115 @@  gimplify_omp_for (tree *expr_p, gimple_s
 
   orig_for_stmt = for_stmt = *expr_p;
 
+  if (OMP_FOR_INIT (for_stmt) == NULL_TREE)
+    {
+      tree *data[4] = { NULL, NULL, NULL, NULL };
+      gcc_assert (TREE_CODE (for_stmt) != OACC_LOOP);
+      inner_for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt),
+				  find_combined_omp_for, data, NULL);
+      if (inner_for_stmt == NULL_TREE)
+	{
+	  gcc_assert (seen_error ());
+	  *expr_p = NULL_TREE;
+	  return GS_ERROR;
+	}
+      if (data[2] && OMP_FOR_PRE_BODY (*data[2]))
+	{
+	  append_to_statement_list_force (OMP_FOR_PRE_BODY (*data[2]),
+					  &OMP_FOR_PRE_BODY (for_stmt));
+	  OMP_FOR_PRE_BODY (*data[2]) = NULL_TREE;
+	}
+      if (OMP_FOR_PRE_BODY (inner_for_stmt))
+	{
+	  append_to_statement_list_force (OMP_FOR_PRE_BODY (inner_for_stmt),
+					  &OMP_FOR_PRE_BODY (for_stmt));
+	  OMP_FOR_PRE_BODY (inner_for_stmt) = NULL_TREE;
+	}
+
+      if (data[0])
+	{
+	  /* We have some statements or variable declarations in between
+	     the composite construct directives.  Move them around the
+	     inner_for_stmt.  */
+	  data[0] = expr_p;
+	  for (i = 0; i < 3; i++)
+	    if (data[i])
+	      {
+		tree t = *data[i];
+		if (i < 2 && data[i + 1] == &OMP_BODY (t))
+		  data[i + 1] = data[i];
+		*data[i] = OMP_BODY (t);
+		tree body = build3 (BIND_EXPR, void_type_node, NULL_TREE,
+				    NULL_TREE, make_node (BLOCK));
+		OMP_BODY (t) = body;
+		append_to_statement_list_force (inner_for_stmt,
+						&BIND_EXPR_BODY (body));
+		*data[3] = t;
+		data[3] = tsi_stmt_ptr (tsi_start (BIND_EXPR_BODY (body)));
+		gcc_assert (*data[3] == inner_for_stmt);
+	      }
+	  return GS_OK;
+	}
+
+      for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (inner_for_stmt)); i++)
+	if (OMP_FOR_ORIG_DECLS (inner_for_stmt)
+	    && TREE_CODE (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (inner_for_stmt),
+					i)) == TREE_LIST)
+	  {
+	    tree orig = TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (inner_for_stmt), i);
+	    /* Class iterators aren't allowed on OMP_SIMD, so the only
+	       case we need to solve is distribute parallel for.  */
+	    gcc_assert (TREE_CODE (inner_for_stmt) == OMP_FOR
+			&& TREE_CODE (for_stmt) == OMP_DISTRIBUTE
+			&& data[1]);
+	    tree orig_decl = TREE_PURPOSE (orig);
+	    tree last = TREE_VALUE (orig);
+	    tree *pc;
+	    for (pc = &OMP_FOR_CLAUSES (inner_for_stmt);
+		 *pc; pc = &OMP_CLAUSE_CHAIN (*pc))
+	      if ((OMP_CLAUSE_CODE (*pc) == OMP_CLAUSE_PRIVATE
+		   || OMP_CLAUSE_CODE (*pc) == OMP_CLAUSE_LASTPRIVATE)
+		  && OMP_CLAUSE_DECL (*pc) == orig_decl)
+		break;
+	    if (*pc == NULL_TREE)
+	      ;
+	    else if (OMP_CLAUSE_CODE (*pc) == OMP_CLAUSE_PRIVATE)
+	      {
+		/* private clause will appear only on inner_for_stmt.
+		   Change it into firstprivate, and add private clause
+		   on for_stmt.  */
+		tree c = copy_node (*pc);
+		OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (for_stmt);
+		OMP_FOR_CLAUSES (for_stmt) = c;
+		OMP_CLAUSE_CODE (*pc) = OMP_CLAUSE_FIRSTPRIVATE;
+		lang_hooks.decls.omp_finish_clause (*pc, pre_p);
+	      }
+	    else
+	      {
+		/* lastprivate clause will appear on both inner_for_stmt
+		   and for_stmt.  Add firstprivate clause to
+		   inner_for_stmt.  */
+		tree c = build_omp_clause (OMP_CLAUSE_LOCATION (*pc),
+					   OMP_CLAUSE_FIRSTPRIVATE);
+		OMP_CLAUSE_DECL (c) = OMP_CLAUSE_DECL (*pc);
+		OMP_CLAUSE_CHAIN (c) = *pc;
+		*pc = c;
+		lang_hooks.decls.omp_finish_clause (*pc, pre_p);
+	      }
+	    tree c = build_omp_clause (UNKNOWN_LOCATION,
+				       OMP_CLAUSE_FIRSTPRIVATE);
+	    OMP_CLAUSE_DECL (c) = last;
+	    OMP_CLAUSE_CHAIN (c) = OMP_PARALLEL_CLAUSES (*data[1]);
+	    OMP_PARALLEL_CLAUSES (*data[1]) = c;
+	    c = build_omp_clause (UNKNOWN_LOCATION,
+				  *pc ? OMP_CLAUSE_SHARED
+				      : OMP_CLAUSE_FIRSTPRIVATE);
+	    OMP_CLAUSE_DECL (c) = orig_decl;
+	    OMP_CLAUSE_CHAIN (c) = OMP_PARALLEL_CLAUSES (*data[1]);
+	    OMP_PARALLEL_CLAUSES (*data[1]) = c;
+	  }
+    }
+
   switch (TREE_CODE (for_stmt))
     {
     case OMP_FOR:
@@ -9611,19 +9749,6 @@  gimplify_omp_for (tree *expr_p, gimple_s
 	  }
     }
 
-  if (OMP_FOR_INIT (for_stmt) == NULL_TREE)
-    {
-      gcc_assert (TREE_CODE (for_stmt) != OACC_LOOP);
-      inner_for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt),
-				  find_combined_omp_for, NULL, NULL);
-      if (inner_for_stmt == NULL_TREE)
-	{
-	  gcc_assert (seen_error ());
-	  *expr_p = NULL_TREE;
-	  return GS_ERROR;
-	}
-    }
-
   if (TREE_CODE (for_stmt) != OMP_TASKLOOP)
     gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort,
 			       TREE_CODE (for_stmt));
@@ -9633,7 +9758,9 @@  gimplify_omp_for (tree *expr_p, gimple_s
 
   /* Handle OMP_FOR_INIT.  */
   for_pre_body = NULL;
-  if (ort == ORT_SIMD && OMP_FOR_PRE_BODY (for_stmt))
+  if ((ort == ORT_SIMD
+       || (inner_for_stmt && TREE_CODE (inner_for_stmt) == OMP_SIMD))
+      && OMP_FOR_PRE_BODY (for_stmt))
     {
       has_decl_expr = BITMAP_ALLOC (NULL);
       if (TREE_CODE (OMP_FOR_PRE_BODY (for_stmt)) == DECL_EXPR
@@ -9774,8 +9901,12 @@  gimplify_omp_for (tree *expr_p, gimple_s
       if (is_doacross)
 	{
 	  if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
-	    gimplify_omp_ctxp->loop_iter_var.quick_push
-	      (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+	    {
+	      tree orig_decl = TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i);
+	      if (TREE_CODE (orig_decl) == TREE_LIST)
+		orig_decl = TREE_PURPOSE (orig_decl);
+	      gimplify_omp_ctxp->loop_iter_var.quick_push (orig_decl);
+	    }
 	  else
 	    gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
 	  gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
@@ -9785,7 +9916,12 @@  gimplify_omp_for (tree *expr_p, gimple_s
       tree c = NULL_TREE;
       tree c2 = NULL_TREE;
       if (orig_for_stmt != for_stmt)
-	/* Do this only on innermost construct for combined ones.  */;
+	{
+	  /* Preserve this information until we gimplify the inner simd.  */
+	  if (has_decl_expr
+	      && bitmap_bit_p (has_decl_expr, DECL_UID (decl)))
+	    TREE_PRIVATE (t) = 1;
+	}
       else if (ort == ORT_SIMD)
 	{
 	  splay_tree_node n = splay_tree_lookup (gimplify_omp_ctxp->variables,
@@ -9800,8 +9936,9 @@  gimplify_omp_for (tree *expr_p, gimple_s
 	      c = build_omp_clause (input_location, OMP_CLAUSE_LINEAR);
 	      OMP_CLAUSE_LINEAR_NO_COPYIN (c) = 1;
 	      unsigned int flags = GOVD_LINEAR | GOVD_EXPLICIT | GOVD_SEEN;
-	      if (has_decl_expr
-		  && bitmap_bit_p (has_decl_expr, DECL_UID (decl)))
+	      if ((has_decl_expr
+		   && bitmap_bit_p (has_decl_expr, DECL_UID (decl)))
+		  || TREE_PRIVATE (t))
 		{
 		  OMP_CLAUSE_LINEAR_NO_COPYOUT (c) = 1;
 		  flags |= GOVD_LINEAR_LASTPRIVATE_NO_OUTER;
@@ -9923,6 +10060,8 @@  gimplify_omp_for (tree *expr_p, gimple_s
 	      bool lastprivate
 		= (!has_decl_expr
 		   || !bitmap_bit_p (has_decl_expr, DECL_UID (decl)));
+	      if (TREE_PRIVATE (t))
+		lastprivate = false;
 	      struct gimplify_omp_ctx *outer
 		= gimplify_omp_ctxp->outer_context;
 	      if (outer && lastprivate)
--- gcc/c-family/c-omp.c.jj	2018-07-05 11:41:51.994718940 +0200
+++ gcc/c-family/c-omp.c	2018-07-09 13:29:59.032898924 +0200
@@ -827,7 +827,9 @@  c_omp_check_loop_iv_r (tree *tp, int *wa
     {
       int i;
       for (i = 0; i < TREE_VEC_LENGTH (d->declv); i++)
-	if (*tp == TREE_VEC_ELT (d->declv, i))
+	if (*tp == TREE_VEC_ELT (d->declv, i)
+	    || (TREE_CODE (TREE_VEC_ELT (d->declv, i)) == TREE_LIST
+		&& *tp == TREE_PURPOSE (TREE_VEC_ELT (d->declv, i))))
 	  {
 	    location_t loc = d->expr_loc;
 	    if (loc == UNKNOWN_LOCATION)
@@ -894,7 +896,9 @@  c_omp_check_loop_iv (tree stmt, tree dec
 	 expression then involves the subtraction and always refers
 	 to the original value.  The C++ FE needs to warn on those
 	 earlier.  */
-      if (decl == TREE_VEC_ELT (declv, i))
+      if (decl == TREE_VEC_ELT (declv, i)
+	  || (TREE_CODE (TREE_VEC_ELT (declv, i)) == TREE_LIST
+	      && decl == TREE_PURPOSE (TREE_VEC_ELT (declv, i))))
 	{
 	  data.expr_loc = EXPR_LOCATION (cond);
 	  data.kind = 1;
--- gcc/cp/semantics.c.jj	2018-07-05 11:41:51.798718714 +0200
+++ gcc/cp/semantics.c	2018-07-09 13:29:59.030898921 +0200
@@ -7679,7 +7679,7 @@  static bool
 handle_omp_for_class_iterator (int i, location_t locus, enum tree_code code,
 			       tree declv, tree orig_declv, tree initv,
 			       tree condv, tree incrv, tree *body,
-			       tree *pre_body, tree &clauses, tree *lastp,
+			       tree *pre_body, tree &clauses,
 			       int collapse, int ordered)
 {
   tree diff, iter_init, iter_incr = NULL, last;
@@ -7983,7 +7983,8 @@  handle_omp_for_class_iterator (int i, lo
   TREE_VEC_ELT (initv, i) = init;
   TREE_VEC_ELT (condv, i) = cond;
   TREE_VEC_ELT (incrv, i) = incr;
-  *lastp = last;
+  TREE_VEC_ELT (orig_declv, i)
+    = tree_cons (TREE_VEC_ELT (orig_declv, i), last, NULL_TREE);
 
   return false;
 }
@@ -8002,7 +8003,6 @@  finish_omp_for (location_t locus, enum t
 {
   tree omp_for = NULL, orig_incr = NULL;
   tree decl = NULL, init, cond, incr;
-  tree last = NULL_TREE;
   location_t elocus;
   int i;
   int collapse = 1;
@@ -8169,7 +8169,7 @@  finish_omp_for (location_t locus, enum t
 	    }
 	  if (handle_omp_for_class_iterator (i, locus, code, declv, orig_declv,
 					     initv, condv, incrv, &body,
-					     &pre_body, clauses, &last,
+					     &pre_body, clauses,
 					     collapse, ordered))
 	    return NULL;
 	  continue;
--- gcc/cp/pt.c.jj	2018-07-05 11:41:51.750718660 +0200
+++ gcc/cp/pt.c	2018-07-09 13:29:59.028898919 +0200
@@ -16267,7 +16267,12 @@  tsubst_omp_for_iterator (tree t, int i,
   if (orig_declv && OMP_FOR_ORIG_DECLS (t))
     {
       tree o = TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (t), i);
-      TREE_VEC_ELT (orig_declv, i) = RECUR (o);
+      if (TREE_CODE (o) == TREE_LIST)
+	TREE_VEC_ELT (orig_declv, i)
+	  = tree_cons (RECUR (TREE_PURPOSE (o)),
+		       RECUR (TREE_VALUE (o)), NULL_TREE);
+      else
+	TREE_VEC_ELT (orig_declv, i) = RECUR (o);
     }
 
   decl = TREE_OPERAND (init, 0);
--- libgomp/testsuite/libgomp.c++/for-15.C.jj	2018-07-09 17:19:46.206382978 +0200
+++ libgomp/testsuite/libgomp.c++/for-15.C	2018-07-09 17:19:57.937398899 +0200
@@ -0,0 +1,232 @@ 
+// PR c++/86443
+// { dg-do run }
+// { dg-additional-options "-std=c++17" }
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+#pragma omp declare target
+template <typename T>
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template <typename S> friend bool operator == (I<S> &, I<S> &);
+  template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator < (I<S> &, I<S> &);
+  template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator <= (I<S> &, I<S> &);
+  template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator > (I<S> &, I<S> &);
+  template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator >= (I<S> &, I<S> &);
+  template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+  template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+  T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () {}
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+  J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+  const I<T> &begin ();
+  const I<T> &end ();
+private:
+  I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+int a[2000];
+int results[2000];
+
+template <typename T>
+void
+baz (I<T> &i)
+{
+  if (*i < 0 || *i >= 2000)
+    abort ();
+  results[*i]++;
+}
+
+void
+baz (int i)
+{
+  if (i < 0 || i >= 2000)
+    abort ();
+  results[i]++;
+}
+
+void
+qux (I<int> &i)
+{
+  if (*i != 1931)
+    abort ();
+}
+
+void
+f1 (J<int> j)
+{
+#pragma omp distribute parallel for default(none)
+  for (I<int> i = j.begin (); i < j.end (); i += 3)
+    baz (*i);
+}
+
+void
+f2 (J<int> j)
+{
+  I<int> i;
+#pragma omp distribute parallel for default(none)
+  for (i = j.begin (); i < j.end (); ++i)
+    baz (*i);
+}
+
+template <int N>
+void
+f3 (J<int> j)
+{
+#pragma omp distribute parallel for default(none)
+  for (I<int> i = j.begin (); i < j.end (); i += 6)
+    baz (*i);
+}
+
+template <int N>
+void
+f4 (J<int> j)
+{
+  I<int> i;
+#pragma omp distribute parallel for default(none)
+  for (i = j.begin (); i < j.end (); i += 9)
+    baz (*i);
+}
+
+template <typename T>
+void
+f5 (J<T> j)
+{
+#pragma omp distribute parallel for default(none)
+  for (I<T> i = j.begin (); i < j.end (); i += 4)
+    baz (*i);
+}
+
+template <typename T>
+void
+f6 (J<T> j)
+{
+  I<T> i;
+#pragma omp distribute parallel for default(none)
+  for (i = j.begin (); i < j.end (); i += 7)
+    baz (*i);
+}
+
+#pragma omp end declare target
+
+#define check(expr) \
+  for (int i = 0; i < 2000; i++)			\
+    if (expr)						\
+      {							\
+	if (results[i] != 1)				\
+	  abort ();					\
+	results[i] = 0;					\
+      }							\
+    else if (results[i])				\
+      abort ()
+
+int
+main ()
+{
+  int a[2000];
+  for (int i = 0; i < 2000; i++)
+    a[i] = i;
+  #pragma omp target data map (to: a)
+  {
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[75], &a[1945]);
+      f1 (j);
+    }
+    check (i >= 75 && i < 1945 && (i - 75) % 3 == 0);
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[63], &a[1949]);
+      f2 (j);
+    }
+    check (i >= 63 && i < 1949);
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[58], &a[1979]);
+      f3 <2> (j);
+    }
+    check (i >= 58 && i < 1979 && (i - 58) % 6 == 0);
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[59], &a[1981]);
+      f4 <9> (j);
+    }
+    check (i >= 59 && i < 1981 && (i - 59) % 9 == 0);
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[52], &a[1972]);
+      f5 (j);
+    }
+    check (i >= 52 && i < 1972 && (i - 52) % 4 == 0);
+    #pragma omp target teams map (tofrom: results)
+    {
+      J<int> j (&a[31], &a[1827]);
+      f6 (j);
+    }
+    check (i >= 31 && i < 1827 && (i - 31) % 7 == 0);
+  }
+}