diff mbox series

[V2,3/5] OpenMP: C++ support for imperfectly-nested loops

Message ID 20230723221521.3739463-4-sandra@codesourcery.com
State New
Headers show
Series OpenMP: support for imperfectly-nested loops | expand

Commit Message

Sandra Loosemore July 23, 2023, 10:15 p.m. UTC
OpenMP 5.0 removed the restriction that multiple collapsed loops must
be perfectly nested, allowing "intervening code" (including nested
BLOCKs) before or after each nested loop.  In GCC this code is moved
into the inner loop body by the respective front ends.

This patch changes the C++ front end to use recursive descent parsing
on nested loops within an "omp for" construct, rather than an
iterative approach, in order to preserve proper nesting of compound
statements.  Preserving cleanups (destructors) for class objects
declared in intervening code and loop initializers complicates moving
the former into the body of the loop; this is handled by parsing the
entire construct before reassembling any of it.

gcc/cp/ChangeLog
	* cp-tree.h (cp_convert_omp_range_for): Adjust declaration.
	* parser.cc (struct omp_for_parse_data): New.
	(cp_parser_postfix_expression): Diagnose calls to OpenMP runtime
	in intervening code.
	(check_omp_intervening_code): New.
	(cp_parser_statement_seq_opt): Special-case nested loops, blocks,
	and other constructs for OpenMP loops.
	(cp_parser_iteration_statement): Reject loops in intervening code.
	(cp_parser_omp_for_loop_init): Expand comments and tweak the
	interface slightly to better distinguish input/output parameters.
	(cp_convert_omp_range_for): Likewise.
	(cp_parser_omp_loop_nest): New, split from cp_parser_omp_for_loop
	and largely rewritten.  Add more comments.
	(insert_structured_blocks): New.
	(find_structured_blocks): New.
	(struct sit_data, substitute_in_tree_walker, substitute_in_tree):
	New.
	(fixup_blocks_walker): New.
	(cp_parser_omp_for_loop): Rewrite to use recursive descent instead
	of a loop.  Add logic to reshuffle the bits of code collected
	during parsing so intervening code gets moved to the loop body.
	(cp_parser_omp_loop): Remove call to finish_omp_for_block, which
	is now redundant.
	(cp_parser_omp_simd): Likewise.
	(cp_parser_omp_for): Likewise.
	(cp_parser_omp_distribute): Likewise.
	(cp_parser_oacc_loop): Likewise.
	(cp_parser_omp_taskloop): Likewise.
	(cp_parser_pragma): Reject OpenMP pragmas in intervening code.
	* parser.h (struct cp_parser): Add omp_for_parse_state field.
	* pt.cc (tsubst_omp_for_iterator): Adjust call to
	cp_convert_omp_range_for.
	* semantics.cc (finish_omp_for): Try harder to preserve location
	of loop variable init expression for use in diagnostics.
	(struct fofb_data, finish_omp_for_block_walker): New.
	(finish_omp_for_block): Allow variables to be bound in a BIND_EXPR
	nested inside BIND instead of directly in BIND itself.

gcc/testsuite/ChangeLog
	* c-c++-common/goacc/tile-2.c: Adjust expected error patterns.
	* g++.dg/gomp/attrs-imperfect1.C: New test.
	* g++.dg/gomp/attrs-imperfect2.C: New test.
	* g++.dg/gomp/attrs-imperfect3.C: New test.
	* g++.dg/gomp/attrs-imperfect4.C: New test.
	* g++.dg/gomp/attrs-imperfect5.C: New test.
	* g++.dg/gomp/pr41967.C: Adjust expected error patterns.
	* g++.dg/gomp/tpl-imperfect-gotos.C: New test.
	* g++.dg/gomp/tpl-imperfect-invalid-scope.C: New test.

libgomp/ChangeLog
	* testsuite/libgomp.c++/attrs-imperfect1.C: New test.
	* testsuite/libgomp.c++/attrs-imperfect2.C: New test.
	* testsuite/libgomp.c++/attrs-imperfect3.C: New test.
	* testsuite/libgomp.c++/attrs-imperfect4.C: New test.
	* testsuite/libgomp.c++/attrs-imperfect5.C: New test.
	* testsuite/libgomp.c++/attrs-imperfect6.C: New test.
	* testsuite/libgomp.c++/imperfect-class-1.C: New test.
	* testsuite/libgomp.c++/imperfect-class-2.C: New test.
	* testsuite/libgomp.c++/imperfect-class-3.C: New test.
	* testsuite/libgomp.c++/imperfect-destructor.C: New test.
	* testsuite/libgomp.c++/imperfect-template-1.C: New test.
	* testsuite/libgomp.c++/imperfect-template-2.C: New test.
	* testsuite/libgomp.c++/imperfect-template-3.C: New test.
---
 gcc/cp/cp-tree.h                              |    2 +-
 gcc/cp/parser.cc                              | 1315 ++++++++++++-----
 gcc/cp/parser.h                               |    3 +
 gcc/cp/pt.cc                                  |    3 +-
 gcc/cp/semantics.cc                           |  117 +-
 gcc/testsuite/c-c++-common/goacc/tile-2.c     |    4 +-
 gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C  |   38 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C  |   34 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C  |   57 +
 gcc/testsuite/g++.dg/gomp/pr41967.C           |    2 +-
 .../g++.dg/gomp/tpl-imperfect-gotos.C         |  161 ++
 .../g++.dg/gomp/tpl-imperfect-invalid-scope.C |   94 ++
 .../testsuite/libgomp.c++/attrs-imperfect1.C  |   76 +
 .../testsuite/libgomp.c++/attrs-imperfect2.C  |  114 ++
 .../testsuite/libgomp.c++/attrs-imperfect3.C  |  119 ++
 .../testsuite/libgomp.c++/attrs-imperfect4.C  |  117 ++
 .../testsuite/libgomp.c++/attrs-imperfect5.C  |   49 +
 .../testsuite/libgomp.c++/attrs-imperfect6.C  |  115 ++
 .../testsuite/libgomp.c++/imperfect-class-1.C |  169 +++
 .../testsuite/libgomp.c++/imperfect-class-2.C |  167 +++
 .../testsuite/libgomp.c++/imperfect-class-3.C |  167 +++
 .../libgomp.c++/imperfect-destructor.C        |  135 ++
 .../libgomp.c++/imperfect-template-1.C        |  172 +++
 .../libgomp.c++/imperfect-template-2.C        |  170 +++
 .../libgomp.c++/imperfect-template-3.C        |  170 +++
 27 files changed, 3228 insertions(+), 408 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/tpl-imperfect-gotos.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/tpl-imperfect-invalid-scope.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect1.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect2.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect3.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect4.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect5.C
 create mode 100644 libgomp/testsuite/libgomp.c++/attrs-imperfect6.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-class-1.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-class-2.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-class-3.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-destructor.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-template-1.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-template-2.C
 create mode 100644 libgomp/testsuite/libgomp.c++/imperfect-template-3.C

Comments

Jakub Jelinek Aug. 22, 2023, 1:31 p.m. UTC | #1
On Sun, Jul 23, 2023 at 04:15:19PM -0600, Sandra Loosemore wrote:
> OpenMP 5.0 removed the restriction that multiple collapsed loops must
> be perfectly nested, allowing "intervening code" (including nested
> BLOCKs) before or after each nested loop.  In GCC this code is moved
> into the inner loop body by the respective front ends.
> 
> This patch changes the C++ front end to use recursive descent parsing
> on nested loops within an "omp for" construct, rather than an
> iterative approach, in order to preserve proper nesting of compound
> statements.  Preserving cleanups (destructors) for class objects
> declared in intervening code and loop initializers complicates moving
> the former into the body of the loop; this is handled by parsing the
> entire construct before reassembling any of it.
> 
> gcc/cp/ChangeLog
> 	* cp-tree.h (cp_convert_omp_range_for): Adjust declaration.
> 	* parser.cc (struct omp_for_parse_data): New.
> 	(cp_parser_postfix_expression): Diagnose calls to OpenMP runtime
> 	in intervening code.
> 	(check_omp_intervening_code): New.
> 	(cp_parser_statement_seq_opt): Special-case nested loops, blocks,
> 	and other constructs for OpenMP loops.
> 	(cp_parser_iteration_statement): Reject loops in intervening code.
> 	(cp_parser_omp_for_loop_init): Expand comments and tweak the
> 	interface slightly to better distinguish input/output parameters.
> 	(cp_convert_omp_range_for): Likewise.
> 	(cp_parser_omp_loop_nest): New, split from cp_parser_omp_for_loop
> 	and largely rewritten.  Add more comments.
> 	(insert_structured_blocks): New.
> 	(find_structured_blocks): New.
> 	(struct sit_data, substitute_in_tree_walker, substitute_in_tree):
> 	New.
> 	(fixup_blocks_walker): New.
> 	(cp_parser_omp_for_loop): Rewrite to use recursive descent instead
> 	of a loop.  Add logic to reshuffle the bits of code collected
> 	during parsing so intervening code gets moved to the loop body.
> 	(cp_parser_omp_loop): Remove call to finish_omp_for_block, which
> 	is now redundant.
> 	(cp_parser_omp_simd): Likewise.
> 	(cp_parser_omp_for): Likewise.
> 	(cp_parser_omp_distribute): Likewise.
> 	(cp_parser_oacc_loop): Likewise.
> 	(cp_parser_omp_taskloop): Likewise.
> 	(cp_parser_pragma): Reject OpenMP pragmas in intervening code.
> 	* parser.h (struct cp_parser): Add omp_for_parse_state field.
> 	* pt.cc (tsubst_omp_for_iterator): Adjust call to
> 	cp_convert_omp_range_for.
> 	* semantics.cc (finish_omp_for): Try harder to preserve location
> 	of loop variable init expression for use in diagnostics.
> 	(struct fofb_data, finish_omp_for_block_walker): New.
> 	(finish_omp_for_block): Allow variables to be bound in a BIND_EXPR
> 	nested inside BIND instead of directly in BIND itself.
> 
> gcc/testsuite/ChangeLog
> 	* c-c++-common/goacc/tile-2.c: Adjust expected error patterns.
> 	* g++.dg/gomp/attrs-imperfect1.C: New test.
> 	* g++.dg/gomp/attrs-imperfect2.C: New test.
> 	* g++.dg/gomp/attrs-imperfect3.C: New test.
> 	* g++.dg/gomp/attrs-imperfect4.C: New test.
> 	* g++.dg/gomp/attrs-imperfect5.C: New test.
> 	* g++.dg/gomp/pr41967.C: Adjust expected error patterns.
> 	* g++.dg/gomp/tpl-imperfect-gotos.C: New test.
> 	* g++.dg/gomp/tpl-imperfect-invalid-scope.C: New test.
> 
> libgomp/ChangeLog
> 	* testsuite/libgomp.c++/attrs-imperfect1.C: New test.
> 	* testsuite/libgomp.c++/attrs-imperfect2.C: New test.
> 	* testsuite/libgomp.c++/attrs-imperfect3.C: New test.
> 	* testsuite/libgomp.c++/attrs-imperfect4.C: New test.
> 	* testsuite/libgomp.c++/attrs-imperfect5.C: New test.
> 	* testsuite/libgomp.c++/attrs-imperfect6.C: New test.
> 	* testsuite/libgomp.c++/imperfect-class-1.C: New test.
> 	* testsuite/libgomp.c++/imperfect-class-2.C: New test.
> 	* testsuite/libgomp.c++/imperfect-class-3.C: New test.
> 	* testsuite/libgomp.c++/imperfect-destructor.C: New test.
> 	* testsuite/libgomp.c++/imperfect-template-1.C: New test.
> 	* testsuite/libgomp.c++/imperfect-template-2.C: New test.
> 	* testsuite/libgomp.c++/imperfect-template-3.C: New test.

Ok (though, if the c-c++-common tests are tweaked in the C patch,
this patch needs to undo that).

	Jakub
diff mbox series

Patch

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3de0e154c12..b0d5b885692 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7305,7 +7305,7 @@  extern bool maybe_clone_body			(tree);
 /* In parser.cc */
 extern tree cp_convert_range_for (tree, tree, tree, tree, unsigned int, bool,
 				  unsigned short);
-extern void cp_convert_omp_range_for (tree &, vec<tree, va_gc> *, tree &,
+extern void cp_convert_omp_range_for (tree &, tree &, tree &,
 				      tree &, tree &, tree &, tree &, tree &);
 extern void cp_finish_omp_range_for (tree, tree);
 extern bool parsing_nsdmi (void);
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 5e2b5cba57e..fc5e827a2e9 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -261,6 +261,10 @@  static bool cp_parser_omp_declare_reduction_exprs
 static void cp_finalize_oacc_routine
   (cp_parser *, tree, bool);
 
+static void check_omp_intervening_code
+  (cp_parser *);
+
+
 /* Manifest constants.  */
 #define CP_LEXER_BUFFER_SIZE ((256 * 1024) / sizeof (cp_token))
 #define CP_SAVED_TOKEN_STACK 5
@@ -2091,6 +2095,52 @@  struct cp_parser_expression_stack_entry
 typedef struct cp_parser_expression_stack_entry
   cp_parser_expression_stack[NUM_PREC_VALUES];
 
+/* Used for parsing OMP for loops.
+
+   Some notes on flags used for context:
+   parser->omp_for_parse_state is non-null anywhere inside the OMP FOR
+   construct, except for the final-loop-body.
+   The want_nested_loop flag is true if inside a {} sequence where
+   a loop-nest (or another {} sequence containing a loop-nest) is expected,
+   but has not yet been seen.  It's false when parsing intervening code
+   statements or their substatements that cannot contain a loop-nest.
+   The in_intervening_code flag is true when parsing any intervening code,
+   including substatements, and whether or not want_nested_loop is true.
+
+   And, about error handling:
+   The saw_intervening_code flag is set if the loop is not perfectly
+   nested, even in the usual case where this is not an error.
+   perfect_nesting_fail is set if an error has been diagnosed because an
+   imperfectly-nested loop was found where a perfectly-nested one is
+   required (we diagnose this only once).
+   fail is set if any kind of structural error in the loop nest
+   has been found and diagnosed.
+  */
+struct omp_for_parse_data {
+  enum tree_code code;
+  tree declv, condv, incrv, initv;
+  tree pre_body;
+  tree orig_declv;
+  auto_vec<tree, 4> orig_inits;
+  int count;	/* Expected nesting depth.  */
+  int depth;	/* Current nesting depth.  */
+  location_t for_loc;
+  releasing_vec init_blockv;
+  releasing_vec body_blockv;
+  releasing_vec init_placeholderv;
+  releasing_vec body_placeholderv;
+  bool ordered : 1;
+  bool inscan : 1;
+  bool want_nested_loop : 1;
+  bool in_intervening_code : 1;
+  bool saw_intervening_code : 1;
+  bool perfect_nesting_fail : 1;
+  bool fail : 1;
+  tree clauses;
+  tree *cclauses;
+  tree ordered_cl;
+};
+
 /* Prototypes.  */
 
 /* Constructors and destructors.  */
@@ -2912,6 +2962,7 @@  static bool cp_parser_skip_up_to_closing_square_bracket
 static bool cp_parser_skip_to_closing_square_bracket
   (cp_parser *);
 static size_t cp_parser_skip_balanced_tokens (cp_parser *, size_t);
+static tree cp_parser_omp_loop_nest (cp_parser *, bool *);
 
 // -------------------------------------------------------------------------- //
 // Unevaluated Operand Guard
@@ -7999,12 +8050,22 @@  cp_parser_postfix_expression (cp_parser *parser, bool address_p, bool cast_p,
 				    complain);
 	    else
 	      /* All other function calls.  */
-	      postfix_expression
-		= finish_call_expr (postfix_expression, &args,
-				    /*disallow_virtual=*/false,
-				    koenig_p,
-				    complain);
-
+	      {
+		if (DECL_P (postfix_expression)
+		    && parser->omp_for_parse_state
+		    && parser->omp_for_parse_state->in_intervening_code
+		    && omp_runtime_api_call (postfix_expression))
+		  {
+		    error_at (loc, "calls to the OpenMP runtime API are "
+				   "not permitted in intervening code");
+		    parser->omp_for_parse_state->fail = true;
+		  }
+		postfix_expression
+		  = finish_call_expr (postfix_expression, &args,
+				      /*disallow_virtual=*/false,
+				      koenig_p,
+				      complain);
+	      }
 	    if (close_paren_loc != UNKNOWN_LOCATION)
 	      postfix_expression.set_location (combined_loc);
 
@@ -12522,9 +12583,15 @@  cp_parser_statement (cp_parser* parser, tree in_statement_expr,
 	 return so that we can check for a close brace.  Otherwise we
 	 require a real statement and must go back and read one.  */
       if (in_compound_for_pragma)
-	cp_parser_pragma (parser, pragma_compound, if_p);
+	{
+	  if (cp_parser_pragma (parser, pragma_compound, if_p)
+	      && parser->omp_for_parse_state)
+	    check_omp_intervening_code (parser);
+	}
       else if (!cp_parser_pragma (parser, pragma_stmt, if_p))
 	do_restart = true;
+      else if (parser->omp_for_parse_state)
+	check_omp_intervening_code (parser);
       if (parser->lexer != lexer
 	  && lexer->in_omp_attribute_pragma
 	  && (!in_omp_attribute_pragma || lexer->orphan_p))
@@ -12960,6 +13027,55 @@  cp_parser_compound_statement (cp_parser *parser, tree in_statement_expr,
   return compound_stmt;
 }
 
+/* Diagnose errors related to imperfectly nested loops in an OMP
+   loop construct.  This function is called when such code is seen.
+   Only issue one such diagnostic no matter how much invalid
+   intervening code there is in the loop.
+   FIXME: maybe the location associated with the diagnostic should
+   be the current parser token instead of the location of the outer loop
+   nest.  */
+
+static void
+check_omp_intervening_code (cp_parser *parser)
+{
+  struct omp_for_parse_data *omp_for_parse_state
+    = parser->omp_for_parse_state;
+  gcc_assert (omp_for_parse_state);
+
+  if (!omp_for_parse_state->in_intervening_code)
+    return;
+  omp_for_parse_state->saw_intervening_code = true;
+
+  /* Only diagnose errors related to perfect nesting once.  */
+  if (!omp_for_parse_state->perfect_nesting_fail)
+    {
+      if (omp_for_parse_state->code == OACC_LOOP)
+	{
+	  error_at (omp_for_parse_state->for_loc,
+		    "inner loops must be perfectly nested in "
+		    "%<#pragma acc loop%>");
+	  omp_for_parse_state->perfect_nesting_fail = true;
+	}
+      else if (omp_for_parse_state->ordered)
+	{
+	  error_at (omp_for_parse_state->for_loc,
+		    "inner loops must be perfectly nested with "
+		    "%<ordered%> clause");
+	  omp_for_parse_state->perfect_nesting_fail = true;
+	}
+      else if (omp_for_parse_state->inscan)
+	{
+	  error_at (omp_for_parse_state->for_loc,
+		    "inner loops must be perfectly nested with "
+		    "%<reduction%> %<inscan%> clause");
+	  omp_for_parse_state->perfect_nesting_fail = true;
+	}
+      /* TODO: Also reject loops with TILE directive.  */
+      if (omp_for_parse_state->perfect_nesting_fail)
+	omp_for_parse_state->fail = true;
+    }
+}
+
 /* Parse an (optional) statement-seq.
 
    statement-seq:
@@ -12969,6 +13085,11 @@  cp_parser_compound_statement (cp_parser *parser, tree in_statement_expr,
 static void
 cp_parser_statement_seq_opt (cp_parser* parser, tree in_statement_expr)
 {
+  struct omp_for_parse_data *omp_for_parse_state
+    = parser->omp_for_parse_state;
+  bool in_omp_loop_block
+    = omp_for_parse_state ? omp_for_parse_state->want_nested_loop : false;
+
   /* Scan statements until there aren't any more.  */
   while (true)
     {
@@ -12996,6 +13117,50 @@  cp_parser_statement_seq_opt (cp_parser* parser, tree in_statement_expr)
 	    }
 	}
 
+      /* Handle special cases for OMP FOR canonical loop syntax.  */
+      else if (in_omp_loop_block)
+	{
+	  bool want_nested_loop = omp_for_parse_state->want_nested_loop;
+	  if (want_nested_loop
+	      && token->type == CPP_KEYWORD && token->keyword == RID_FOR)
+	    {
+	      /* Found the nested loop.  */
+	      omp_for_parse_state->depth++;
+	      add_stmt (cp_parser_omp_loop_nest (parser, NULL));
+	      omp_for_parse_state->depth--;
+	    }
+	  else if (token->type == CPP_SEMICOLON)
+	    {
+	      /* Prior to implementing the OpenMP 5.1 syntax for canonical
+		 loop form, GCC used to accept an empty statements as not
+		 being intervening code.  Continue to do that, as an
+		 extension.  */
+	      /* FIXME:  Maybe issue a warning or something here?  */
+	      cp_lexer_consume_token (parser->lexer);
+	    }
+	  else if (want_nested_loop && token->type == CPP_OPEN_BRACE)
+	    /* The nested compound statement may contain the next loop, or
+	       it might just be intervening code.  */
+	    {
+	      cp_parser_statement (parser, in_statement_expr, true, NULL);
+	      if (omp_for_parse_state->want_nested_loop)
+		check_omp_intervening_code (parser);
+	    }
+	  else
+	    {
+	      /* This must be intervening code.  */
+	      omp_for_parse_state->want_nested_loop = false;
+	      /* Defer calling check_omp_intervening_code on pragmas until
+		 cp_parser_statement, because we can't know until we parse
+		 it whether or not the pragma is a statement.  */
+	      if (token->type != CPP_PRAGMA)
+		check_omp_intervening_code (parser);
+	      cp_parser_statement (parser, in_statement_expr, true, NULL);
+	      omp_for_parse_state->want_nested_loop = want_nested_loop;
+	    }
+	  continue;
+	}
+
       /* Parse the statement.  */
       cp_parser_statement (parser, in_statement_expr, true, NULL);
     }
@@ -14194,6 +14359,15 @@  cp_parser_iteration_statement (cp_parser* parser, bool *if_p, bool ivdep,
      statement.  */
   in_statement = parser->in_statement;
 
+  /* Special case for OMP loop intervening code.  Parsing of permitted
+     collapsed loop nests is handled elsewhere.  */
+  if (parser->omp_for_parse_state)
+    {
+      error_at (token->location,
+		"loop not permitted in intervening code in OpenMP loop body");
+      parser->omp_for_parse_state->fail = true;
+    }
+
   /* See what kind of keyword it is.  */
   keyword = token->keyword;
   switch (keyword)
@@ -43053,7 +43227,19 @@  cp_parser_omp_for_incr (cp_parser *parser, tree decl)
   return build2 (MODIFY_EXPR, TREE_TYPE (decl), decl, rhs);
 }
 
-/* Parse the initialization statement of an OpenMP for loop.
+/* Parse the initialization statement of an OpenMP for loop.  Range-for
+   is handled separately in cp_convert_omp_range_for.
+
+   On entry SL is the current statement list.  Parsing of some forms
+   of initialization pops this list and stores its contents in either INIT
+   or THIS_PRE_BODY, and sets SL to null.  Initialization for class
+   iterators is added directly to SL and it is not popped until later.
+
+   On return, DECL is set if the initialization is by binding the
+   iteration variable.  If the initialization is by assignment, REAL_DECL
+   is set to point to a variable in an outer scope.  ORIG_INIT is set
+   if the iteration variable is of class type; this is a copy saved for
+   error checking in finish_omp_for.
 
    Return true if the resulting construct should have an
    OMP_CLAUSE_PRIVATE added to it.  */
@@ -43061,7 +43247,7 @@  cp_parser_omp_for_incr (cp_parser *parser, tree decl)
 static tree
 cp_parser_omp_for_loop_init (cp_parser *parser,
 			     tree &this_pre_body,
-			     releasing_vec &for_block,
+			     tree &sl,
 			     tree &init,
 			     tree &orig_init,
 			     tree &decl,
@@ -43159,18 +43345,22 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 			      asm_specification,
 			      LOOKUP_ONLYCONVERTING);
 	      orig_init = init;
+
+	      /* In the case of a class iterator, do not pop sl here.
+		 Both class initialization and finalization must happen in
+		 the enclosing init block scope.  For now set the init
+		 expression to null; it'll be filled in properly in
+		 finish_omp_for before stuffing it in the OMP_FOR.  */
 	      if (CLASS_TYPE_P (TREE_TYPE (decl)))
+		init = NULL_TREE;
+	      else  /* It is a parameterized type.  */
 		{
-		  vec_safe_push (for_block, this_pre_body);
-		  init = NULL_TREE;
-		}
-	      else
-		{
-		  init = pop_stmt_list (this_pre_body);
+		  init = pop_stmt_list (sl);
+		  sl = NULL_TREE;
 		  if (init && TREE_CODE (init) == STATEMENT_LIST)
 		    {
 		      tree_stmt_iterator i = tsi_start (init);
-		      /* Move lambda DECL_EXPRs to FOR_BLOCK.  */
+		      /* Move lambda DECL_EXPRs to the enclosing block.  */
 		      while (!tsi_end_p (i))
 			{
 			  tree t = tsi_stmt (i);
@@ -43178,7 +43368,7 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 			      && TREE_CODE (DECL_EXPR_DECL (t)) == TYPE_DECL)
 			    {
 			      tsi_delink (&i);
-			      vec_safe_push (for_block, t);
+			      add_stmt (t);
 			      continue;
 			    }
 			  break;
@@ -43192,9 +43382,10 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 			}
 		    }
 		}
-	      this_pre_body = NULL_TREE;
 	    }
 	  else
+	    /* This is an initialized declaration of non-class,
+	       non-parameterized type iteration variable.  */
 	    {
 	      /* Consume '='.  */
 	      cp_lexer_consume_token (parser->lexer);
@@ -43208,6 +43399,8 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 				/*init_const_expr_p=*/false,
 				asm_specification,
 				LOOKUP_ONLYCONVERTING);
+	      this_pre_body = pop_stmt_list (sl);
+	      sl = NULL_TREE;
 	    }
 
 	  if (pushed_scope)
@@ -43279,14 +43472,21 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 		real_decl = TREE_OPERAND (init, 0);
 	    }
 	}
+      this_pre_body = pop_stmt_list (sl);
+      sl = NULL_TREE;
     }
   return add_private_clause;
 }
 
-/* Helper for cp_parser_omp_for_loop, handle one range-for loop.  */
+/* Helper for cp_parser_omp_loop_nest, handle one range-for loop
+   including introducing new temporaries for the range start and end,
+   doing auto deduction, and processing decomposition variables.
 
+   This function is also called from pt.cc during template instantiation.
+   In that case SL is NULL_TREE, otherwise it is the current statement
+   list.  */
 void
-cp_convert_omp_range_for (tree &this_pre_body, vec<tree, va_gc> *for_block,
+cp_convert_omp_range_for (tree &this_pre_body, tree &sl,
 			  tree &decl, tree &orig_decl, tree &init,
 			  tree &orig_init, tree &cond, tree &incr)
 {
@@ -43322,8 +43522,11 @@  cp_convert_omp_range_for (tree &this_pre_body, vec<tree, va_gc> *for_block,
       cond = global_namespace;
       incr = NULL_TREE;
       orig_init = init;
-      if (this_pre_body)
-	this_pre_body = pop_stmt_list (this_pre_body);
+      if (sl)
+	{
+	  this_pre_body = pop_stmt_list (sl);
+	  sl = NULL_TREE;
+	}
       return;
     }
 
@@ -43403,11 +43606,7 @@  cp_convert_omp_range_for (tree &this_pre_body, vec<tree, va_gc> *for_block,
 
   orig_decl = decl;
   decl = begin;
-  if (for_block)
-    {
-      vec_safe_push (for_block, this_pre_body);
-      this_pre_body = NULL_TREE;
-    }
+  /* Defer popping sl here.  */
 
   tree decomp_first_name = NULL_TREE;
   unsigned decomp_cnt = 0;
@@ -43445,6 +43644,15 @@  cp_convert_omp_range_for (tree &this_pre_body, vec<tree, va_gc> *for_block,
 	}
     }
 
+  /* The output ORIG_DECL is not a decl.  Instead, it is a tree structure
+     that holds decls for variables implementing the iterator, represented
+     as a TREE_LIST whose TREE_CHAIN is a vector.  The first two elements
+     of the vector are decls of scratch variables for the range start and
+     end that will eventually be bound in the implicit scope surrounding
+     the whole loop nest.  The remaining elements are decls of derived
+     decomposition variables that are bound inside the loop body.  This
+     structure is further mangled by finish_omp_for into the form required
+     for the OMP_FOR_ORIG_DECLS field of the OMP_FOR tree node.  */
   tree v = make_tree_vec (decomp_cnt + 3);
   TREE_VEC_ELT (v, 0) = range_temp_decl;
   TREE_VEC_ELT (v, 1) = end;
@@ -43699,404 +43907,762 @@  cp_parser_omp_scan_loop_body (cp_parser *parser)
   braces.require_close (parser);
 }
 
-/* Parse the restricted form of the for statement allowed by OpenMP.  */
 
-static tree
-cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
-			tree *cclauses, bool *if_p)
-{
-  tree init, orig_init, cond, incr, body, decl, pre_body = NULL_TREE, ret;
-  tree orig_decl;
-  tree real_decl, initv, condv, incrv, declv, orig_declv;
-  tree this_pre_body, cl, ordered_cl = NULL_TREE;
-  location_t loc_first;
-  bool collapse_err = false;
-  int i, collapse = 1, ordered = 0, count, nbraces = 0;
-  releasing_vec for_block;
-  auto_vec<tree, 4> orig_inits;
-  bool tiling = false;
-  bool inscan = false;
+/* This function parses a single level of a loop nest, invoking itself
+   recursively if necessary.
 
-  for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
-    if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
-      collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
-    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_TILE)
-      {
-	tiling = true;
-	collapse = list_length (OMP_CLAUSE_TILE_LIST (cl));
-      }
-    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
-	     && OMP_CLAUSE_ORDERED_EXPR (cl))
-      {
-	ordered_cl = cl;
-	ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
-      }
-    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_REDUCTION
-	     && OMP_CLAUSE_REDUCTION_INSCAN (cl)
-	     && (code == OMP_SIMD || code == OMP_FOR))
-      inscan = true;
+   loop-nest :: for (...) loop-body
+   loop-body :: loop-nest
+	     |  { [intervening-code] loop-body [intervening-code] }
+	     |  final-loop-body
+   intervening-code :: structured-block-sequence
+   final-loop-body :: structured-block
 
-  if (ordered && ordered < collapse)
-    {
-      error_at (OMP_CLAUSE_LOCATION (ordered_cl),
-		"%<ordered%> clause parameter is less than %<collapse%>");
-      OMP_CLAUSE_ORDERED_EXPR (ordered_cl)
-	= build_int_cst (NULL_TREE, collapse);
-      ordered = collapse;
-    }
+   For a collapsed loop nest, only a single OMP_FOR is built, pulling out
+   all the iterator information from the inner loops into vectors in the
+   parser->omp_for_parse_state structure.
 
-  gcc_assert (tiling || (collapse >= 1 && ordered >= 0));
-  count = ordered ? ordered : collapse;
+   In the "range for" case, it is transformed into a regular "for" iterator
+   by introducing some temporary variables for the begin/end,
+   as well as bindings of the actual iteration variables which are
+   injected into the body of the loop.
 
-  declv = make_tree_vec (count);
-  initv = make_tree_vec (count);
-  condv = make_tree_vec (count);
-  incrv = make_tree_vec (count);
-  orig_declv = NULL_TREE;
+   Initialization code for iterator variables may end up either in the
+   init vector (simple assignments), in omp_for_parse_state->pre_body
+   (decl_exprs for iterators bound in the for statement), or in the
+   scope surrounding this level of loop initialization.
 
-  loc_first = cp_lexer_peek_token (parser->lexer)->location;
+   The scopes of class iterator variables and their finalizers need to
+   be adjusted after parsing so that all of the initialization happens
+   in a scope surrounding all of the intervening and body code.  For
+   this reason we separately store the initialization and body blocks
+   for each level of loops in the omp_for_parse_state structure and
+   reassemble/reorder them in cp_parser_omp_for.  See additional
+   comments there about the use of placeholders, etc.  */
 
-  for (i = 0; i < count; i++)
-    {
-      int bracecount = 0;
-      tree add_private_clause = NULL_TREE;
-      location_t loc;
+static tree
+cp_parser_omp_loop_nest (cp_parser *parser, bool *if_p)
+{
+  tree decl, cond, incr, init;
+  tree orig_init, real_decl, orig_decl;
+  tree init_block, body_block;
+  tree init_placeholder, body_placeholder;
+  tree init_scope;
+  tree this_pre_body = NULL_TREE;
+  bool moreloops;
+  unsigned char save_in_statement;
+  tree add_private_clause = NULL_TREE;
+  location_t loc;
+  bool is_range_for = false;
+  tree sl = NULL_TREE;
+  struct omp_for_parse_data *omp_for_parse_state
+    = parser->omp_for_parse_state;
+  gcc_assert (omp_for_parse_state);
+  int depth = omp_for_parse_state->depth;
+
+  /* We have already matched the FOR token but not consumed it yet.  */
+  gcc_assert (cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR));
+  loc = cp_lexer_consume_token (parser->lexer)->location;
+
+  /* Forbid break/continue in the loop initializer, condition, and
+     increment expressions.  */
+  save_in_statement = parser->in_statement;
+  parser->in_statement = IN_OMP_BLOCK;
+
+  /* We are not in intervening code now.  */
+  omp_for_parse_state->in_intervening_code = false;
+
+  /* Don't create location wrapper nodes within an OpenMP "for"
+     statement.  */
+  auto_suppress_location_wrappers sentinel;
 
-      if (!cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
-	{
-	  if (!collapse_err)
-	    cp_parser_error (parser, "for statement expected");
-	  return NULL;
-	}
-      loc = cp_lexer_consume_token (parser->lexer)->location;
+  matching_parens parens;
+  if (!parens.require_open (parser))
+    return NULL;
 
-      /* Don't create location wrapper nodes within an OpenMP "for"
-	 statement.  */
-      auto_suppress_location_wrappers sentinel;
+  init = orig_init = decl = real_decl = orig_decl = NULL_TREE;
 
-      matching_parens parens;
-      if (!parens.require_open (parser))
-	return NULL;
+  init_placeholder = build_stmt (input_location, EXPR_STMT,
+				 integer_zero_node);
+  vec_safe_push (omp_for_parse_state->init_placeholderv, init_placeholder);
 
-      init = orig_init = decl = real_decl = orig_decl = NULL_TREE;
-      this_pre_body = push_stmt_list ();
+  /* The init_block acts as a container for this level of loop goo.  */
+  init_block = push_stmt_list ();
+  vec_safe_push (omp_for_parse_state->init_blockv, init_block);
 
-      if (code != OACC_LOOP && cxx_dialect >= cxx11)
-	{
-	  /* Save tokens so that we can put them back.  */
-	  cp_lexer_save_tokens (parser->lexer);
+  /* Wrap a scope around this entire level of loop to hold bindings
+     of loop iteration variables.  We can't insert them directly
+     in the containing scope because that would cause their visibility to
+     be incorrect with respect to intervening code after this loop.
+     We will combine the nested init_scopes in postprocessing after the
+     entire loop is parsed.  */
+  init_scope = begin_compound_stmt (0);
 
-	  /* Look for ':' that is not nested in () or {}.  */
-	  bool is_range_for
-	    = (cp_parser_skip_to_closing_parenthesis_1 (parser,
-							/*recovering=*/false,
-							CPP_COLON,
-							/*consume_paren=*/
-							false) == -1);
+  /* Now we need another level of statement list container to capture the
+     initialization (and possible finalization) bits.  In some cases this
+     container may be popped off during initializer parsing to store code in
+     INIT or THIS_PRE_BODY, depending on the form of initialization.  If
+     we have a class iterator we will pop it at the end of parsing this
+     level, so the cleanups are handled correctly.  */
+  sl = push_stmt_list ();
 
-	  /* Roll back the tokens we skipped.  */
-	  cp_lexer_rollback_tokens (parser->lexer);
+  if (omp_for_parse_state->code != OACC_LOOP && cxx_dialect >= cxx11)
+    {
+      /* Save tokens so that we can put them back.  */
+      cp_lexer_save_tokens (parser->lexer);
 
-	  if (is_range_for)
-	    {
-	      bool saved_colon_corrects_to_scope_p
-		= parser->colon_corrects_to_scope_p;
+      /* Look for ':' that is not nested in () or {}.  */
+      is_range_for
+	= (cp_parser_skip_to_closing_parenthesis_1 (parser,
+						    /*recovering=*/false,
+						    CPP_COLON,
+						    /*consume_paren=*/
+						    false) == -1);
 
-	      /* A colon is used in range-based for.  */
-	      parser->colon_corrects_to_scope_p = false;
+      /* Roll back the tokens we skipped.  */
+      cp_lexer_rollback_tokens (parser->lexer);
 
-	      /* Parse the declaration.  */
-	      cp_parser_simple_declaration (parser,
-					    /*function_definition_allowed_p=*/
-					    false, &decl);
-	      parser->colon_corrects_to_scope_p
-		= saved_colon_corrects_to_scope_p;
+      if (is_range_for)
+	{
+	  bool saved_colon_corrects_to_scope_p
+	    = parser->colon_corrects_to_scope_p;
 
-	      cp_parser_require (parser, CPP_COLON, RT_COLON);
+	  /* A colon is used in range-based for.  */
+	  parser->colon_corrects_to_scope_p = false;
 
-	      init = cp_parser_range_for (parser, NULL_TREE, NULL_TREE, decl,
-					  false, 0, true);
+	  /* Parse the declaration.  */
+	  cp_parser_simple_declaration (parser,
+					/*function_definition_allowed_p=*/
+					false, &decl);
+	  parser->colon_corrects_to_scope_p
+	    = saved_colon_corrects_to_scope_p;
 
-	      cp_convert_omp_range_for (this_pre_body, for_block, decl,
-					orig_decl, init, orig_init,
-					cond, incr);
-	      if (this_pre_body)
-		{
-		  if (pre_body)
-		    {
-		      tree t = pre_body;
-		      pre_body = push_stmt_list ();
-		      add_stmt (t);
-		      add_stmt (this_pre_body);
-		      pre_body = pop_stmt_list (pre_body);
-		    }
-		  else
-		    pre_body = this_pre_body;
-		}
+	  cp_parser_require (parser, CPP_COLON, RT_COLON);
 
-	      if (ordered_cl)
-		error_at (OMP_CLAUSE_LOCATION (ordered_cl),
-			  "%<ordered%> clause with parameter on "
-			  "range-based %<for%> loop");
+	  init = cp_parser_range_for (parser, NULL_TREE, NULL_TREE, decl,
+				      false, 0, true);
 
-	      goto parse_close_paren;
-	    }
-	}
+	  cp_convert_omp_range_for (this_pre_body, sl, decl,
+				    orig_decl, init, orig_init,
+				    cond, incr);
 
-      add_private_clause
-	= cp_parser_omp_for_loop_init (parser, this_pre_body, for_block,
-				       init, orig_init, decl, real_decl);
+	  if (omp_for_parse_state->ordered_cl)
+	    error_at (OMP_CLAUSE_LOCATION (omp_for_parse_state->ordered_cl),
+		      "%<ordered%> clause with parameter on "
+		      "range-based %<for%> loop");
 
-      cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
-      if (this_pre_body)
-	{
-	  this_pre_body = pop_stmt_list (this_pre_body);
-	  if (pre_body)
-	    {
-	      tree t = pre_body;
-	      pre_body = push_stmt_list ();
-	      add_stmt (t);
-	      add_stmt (this_pre_body);
-	      pre_body = pop_stmt_list (pre_body);
-	    }
-	  else
-	    pre_body = this_pre_body;
+	  goto parse_close_paren;
 	}
+    }
 
-      if (decl)
-	real_decl = decl;
-      if (cclauses != NULL
-	  && cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL] != NULL
-	  && real_decl != NULL_TREE
-	  && code != OMP_LOOP)
-	{
-	  tree *c;
-	  for (c = &cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL]; *c ; )
-	    if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_FIRSTPRIVATE
-		&& OMP_CLAUSE_DECL (*c) == real_decl)
-	      {
-		error_at (loc, "iteration variable %qD"
-			  " should not be firstprivate", real_decl);
-		*c = OMP_CLAUSE_CHAIN (*c);
-	      }
-	    else if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_LASTPRIVATE
-		     && OMP_CLAUSE_DECL (*c) == real_decl)
+  add_private_clause
+    = cp_parser_omp_for_loop_init (parser, this_pre_body, sl,
+				   init, orig_init, decl, real_decl);
+
+  cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+
+  /* If the iteration variable was introduced via a declaration in the
+     for statement, DECL points at it.  Otherwise DECL is null and
+     REAL_DECL is a variable previously declared in an outer scope.
+     Make REAL_DECL point at the iteration variable no matter where it
+     was introduced.  */
+  if (decl)
+    real_decl = decl;
+
+  /* Some clauses treat iterator variables specially.  */
+  if (omp_for_parse_state->cclauses != NULL
+      && omp_for_parse_state->cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL] != NULL
+      && real_decl != NULL_TREE
+      && omp_for_parse_state->code != OMP_LOOP)
+    {
+      tree *c;
+      for (c = &(omp_for_parse_state->cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL]);
+	   *c ; )
+	if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_FIRSTPRIVATE
+	    && OMP_CLAUSE_DECL (*c) == real_decl)
+	  {
+	    error_at (loc, "iteration variable %qD"
+		      " should not be firstprivate", real_decl);
+	    *c = OMP_CLAUSE_CHAIN (*c);
+	  }
+	else if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_LASTPRIVATE
+		 && OMP_CLAUSE_DECL (*c) == real_decl)
+	  {
+	    /* Move lastprivate (decl) clause to OMP_FOR_CLAUSES.  */
+	    tree l = *c;
+	    *c = OMP_CLAUSE_CHAIN (*c);
+	    if (omp_for_parse_state->code == OMP_SIMD)
 	      {
-		/* Move lastprivate (decl) clause to OMP_FOR_CLAUSES.  */
-		tree l = *c;
-		*c = OMP_CLAUSE_CHAIN (*c);
-		if (code == OMP_SIMD)
-		  {
-		    OMP_CLAUSE_CHAIN (l) = cclauses[C_OMP_CLAUSE_SPLIT_FOR];
-		    cclauses[C_OMP_CLAUSE_SPLIT_FOR] = l;
-		  }
-		else
-		  {
-		    OMP_CLAUSE_CHAIN (l) = clauses;
-		    clauses = l;
-		  }
-		add_private_clause = NULL_TREE;
+		OMP_CLAUSE_CHAIN (l)
+		  = omp_for_parse_state->cclauses[C_OMP_CLAUSE_SPLIT_FOR];
+		omp_for_parse_state->cclauses[C_OMP_CLAUSE_SPLIT_FOR] = l;
 	      }
 	    else
 	      {
-		if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_PRIVATE
-		    && OMP_CLAUSE_DECL (*c) == real_decl)
-		  add_private_clause = NULL_TREE;
-		c = &OMP_CLAUSE_CHAIN (*c);
+		OMP_CLAUSE_CHAIN (l) = omp_for_parse_state->clauses;
+		omp_for_parse_state->clauses = l;
 	      }
-	}
+	    add_private_clause = NULL_TREE;
+	  }
+	else
+	  {
+	    if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_PRIVATE
+		&& OMP_CLAUSE_DECL (*c) == real_decl)
+	      add_private_clause = NULL_TREE;
+	    c = &OMP_CLAUSE_CHAIN (*c);
+	  }
+    }
 
-      if (add_private_clause)
+  if (add_private_clause)
+    {
+      tree c;
+      for (c = omp_for_parse_state->clauses; c ; c = OMP_CLAUSE_CHAIN (c))
 	{
-	  tree c;
-	  for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+	  if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
+	       || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
+	      && OMP_CLAUSE_DECL (c) == decl)
+	    break;
+	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
+		   && OMP_CLAUSE_DECL (c) == decl)
+	    error_at (loc, "iteration variable %qD "
+		      "should not be firstprivate",
+		      decl);
+	  else if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+		    || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION)
+		   && OMP_CLAUSE_DECL (c) == decl)
+	    error_at (loc, "iteration variable %qD should not be reduction",
+		      decl);
+	}
+      if (c == NULL)
+	{
+	  if ((omp_for_parse_state->code == OMP_SIMD
+	       && omp_for_parse_state->count != 1)
+	      || omp_for_parse_state->code == OMP_LOOP)
+	    c = build_omp_clause (loc, OMP_CLAUSE_LASTPRIVATE);
+	  else if (omp_for_parse_state->code != OMP_SIMD)
+	    c = build_omp_clause (loc, OMP_CLAUSE_PRIVATE);
+	  else
+	    c = build_omp_clause (loc, OMP_CLAUSE_LINEAR);
+	  OMP_CLAUSE_DECL (c) = add_private_clause;
+	  c = finish_omp_clauses (c, C_ORT_OMP);
+	  if (c)
 	    {
-	      if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
-		   || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
-		  && OMP_CLAUSE_DECL (c) == decl)
-		break;
-	      else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
-		       && OMP_CLAUSE_DECL (c) == decl)
-		error_at (loc, "iteration variable %qD "
-			  "should not be firstprivate",
-			  decl);
-	      else if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
-			|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION)
-		       && OMP_CLAUSE_DECL (c) == decl)
-		error_at (loc, "iteration variable %qD should not be reduction",
-			  decl);
-	    }
-	  if (c == NULL)
-	    {
-	      if ((code == OMP_SIMD && collapse != 1) || code == OMP_LOOP)
-		c = build_omp_clause (loc, OMP_CLAUSE_LASTPRIVATE);
-	      else if (code != OMP_SIMD)
-		c = build_omp_clause (loc, OMP_CLAUSE_PRIVATE);
-	      else
-		c = build_omp_clause (loc, OMP_CLAUSE_LINEAR);
-	      OMP_CLAUSE_DECL (c) = add_private_clause;
-	      c = finish_omp_clauses (c, C_ORT_OMP);
-	      if (c)
-		{
-		  OMP_CLAUSE_CHAIN (c) = clauses;
-		  clauses = c;
-		  /* For linear, signal that we need to fill up
-		     the so far unknown linear step.  */
-		  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
-		    OMP_CLAUSE_LINEAR_STEP (c) = NULL_TREE;
-		}
+	      OMP_CLAUSE_CHAIN (c) = omp_for_parse_state->clauses;
+	      omp_for_parse_state->clauses = c;
+	      /* For linear, signal that we need to fill up
+		 the so far unknown linear step.  */
+	      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
+		OMP_CLAUSE_LINEAR_STEP (c) = NULL_TREE;
 	    }
 	}
+    }
 
-      cond = NULL;
-      if (cp_lexer_next_token_is_not (parser->lexer, CPP_SEMICOLON))
-	cond = cp_parser_omp_for_cond (parser, decl, code);
-      cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+  cond = NULL;
+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_SEMICOLON))
+    cond = cp_parser_omp_for_cond (parser, decl, omp_for_parse_state->code);
+  cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
-      incr = NULL;
-      if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN))
-	{
-	  /* If decl is an iterator, preserve the operator on decl
-	     until finish_omp_for.  */
-	  if (real_decl
-	      && ((processing_template_decl
-		   && (TREE_TYPE (real_decl) == NULL_TREE
-		       || !INDIRECT_TYPE_P (TREE_TYPE (real_decl))))
-		  || CLASS_TYPE_P (TREE_TYPE (real_decl))))
-	    incr = cp_parser_omp_for_incr (parser, real_decl);
-	  else
-	    incr = cp_parser_expression (parser);
-	  protected_set_expr_location_if_unset (incr, input_location);
-	}
+  incr = NULL;
+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN))
+    {
+      /* If decl is an iterator, preserve the operator on decl
+	 until finish_omp_for.  */
+      if (real_decl
+	  && ((processing_template_decl
+	       && (TREE_TYPE (real_decl) == NULL_TREE
+		   || !INDIRECT_TYPE_P (TREE_TYPE (real_decl))))
+	      || CLASS_TYPE_P (TREE_TYPE (real_decl))))
+	incr = cp_parser_omp_for_incr (parser, real_decl);
+      else
+	incr = cp_parser_expression (parser);
+      protected_set_expr_location_if_unset (incr, input_location);
+    }
 
-    parse_close_paren:
-      if (!parens.require_close (parser))
-	cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true,
-					       /*or_comma=*/false,
-					       /*consume_paren=*/true);
+ parse_close_paren:
+  if (!parens.require_close (parser))
+    cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true,
+					   /*or_comma=*/false,
+					   /*consume_paren=*/true);
 
-      TREE_VEC_ELT (declv, i) = decl;
-      TREE_VEC_ELT (initv, i) = init;
-      TREE_VEC_ELT (condv, i) = cond;
-      TREE_VEC_ELT (incrv, i) = incr;
-      if (orig_init)
-	{
-	  orig_inits.safe_grow_cleared (i + 1, true);
-	  orig_inits[i] = orig_init;
-	}
-      if (orig_decl)
+  /* We've parsed all the for (...) stuff now.  Store the bits.  */
+  TREE_VEC_ELT (omp_for_parse_state->declv, depth) = decl;
+  TREE_VEC_ELT (omp_for_parse_state->initv, depth) = init;
+  TREE_VEC_ELT (omp_for_parse_state->condv, depth) = cond;
+  TREE_VEC_ELT (omp_for_parse_state->incrv, depth) = incr;
+  if (orig_init)
+    {
+      omp_for_parse_state->orig_inits.safe_grow_cleared (depth + 1, true);
+      omp_for_parse_state->orig_inits[depth] = orig_init;
+    }
+  if (orig_decl)
+    {
+      if (!omp_for_parse_state->orig_declv)
+	omp_for_parse_state->orig_declv
+	  = copy_node (omp_for_parse_state->declv);
+      TREE_VEC_ELT (omp_for_parse_state->orig_declv, depth) = orig_decl;
+    }
+  else if (omp_for_parse_state->orig_declv)
+    TREE_VEC_ELT (omp_for_parse_state->orig_declv, depth) = decl;
+  if (this_pre_body)
+    append_to_statement_list_force (this_pre_body,
+				    &(omp_for_parse_state->pre_body));
+
+  /* Start a nested block for the loop body.  */
+  body_placeholder = build_stmt (input_location, EXPR_STMT,
+				 integer_zero_node);
+  vec_safe_push (omp_for_parse_state->body_placeholderv, body_placeholder);
+  body_block = push_stmt_list ();
+  vec_safe_push (omp_for_parse_state->body_blockv, body_block);
+
+  moreloops = depth < omp_for_parse_state->count - 1;
+  omp_for_parse_state->want_nested_loop = moreloops;
+  if (moreloops && cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+    {
+      omp_for_parse_state->depth++;
+      add_stmt (cp_parser_omp_loop_nest (parser, if_p));
+      omp_for_parse_state->depth--;
+    }
+  else if (moreloops
+	   && cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+    {
+      /* This is the open brace in the loop-body grammar production.  Rather
+	 than trying to special-case braces, just parse it as a compound
+	 statement and handle the nested loop-body case there.  Note that
+	 when we see a further open brace inside the compound statement
+	 loop-body, we don't know whether it is the start of intervening
+	 code that is a compound statement, or a level of braces
+	 surrounding a nested loop-body.  Use the WANT_NESTED_LOOP state
+	 bit to ensure we have only one nested loop at each level.  */
+
+      omp_for_parse_state->in_intervening_code = true;
+      cp_parser_compound_statement (parser, NULL, BCS_NORMAL, false);
+      omp_for_parse_state->in_intervening_code = false;
+
+      if (omp_for_parse_state->want_nested_loop)
 	{
-	  if (!orig_declv)
-	    orig_declv = copy_node (declv);
-	  TREE_VEC_ELT (orig_declv, i) = orig_decl;
+	  /* We have already parsed the whole loop body and not found a
+	     nested loop.  */
+	  error_at (omp_for_parse_state->for_loc,
+		    "not enough nested loops");
+	  omp_for_parse_state->fail = true;
 	}
-      else if (orig_declv)
-	TREE_VEC_ELT (orig_declv, i) = decl;
+      if_p = NULL;
+    }
+  else
+    {
+      /* This is the final-loop-body case in the grammar: we have something
+	 that is not a FOR and not an open brace.  */
+      if (moreloops)
+	{
+	  /* If we were expecting a nested loop, give an error and mark
+	     that parsing has failed, and try to recover by parsing the
+	     body as regular code without further collapsing.  */
+	  error_at (omp_for_parse_state->for_loc,
+		    "not enough nested loops");
+	  omp_for_parse_state->fail = true;
+	}
+      parser->in_statement = IN_OMP_FOR;
+
+      /* Generate the parts of range for that belong in the loop body,
+	 to be executed on every iteration.  This includes setting the
+	 user-declared decomposition variables from the compiler-generated
+	 temporaries that are the real iteration variables for OMP_FOR.
+	 FIXME:  Not sure if this is correct with respect to visibility
+	 of the variables from intervening code.  However, putting this
+	 code in each level of loop instead of all around the innermost
+	 body also makes the decomposition variables visible to the
+	 inner for init/bound/step exressions, which is not supposed to
+	 happen and causes test failures.  */
+      if (omp_for_parse_state->orig_declv)
+	for (int i = 0; i < omp_for_parse_state->count; i++)
+	  {
+	    tree o = TREE_VEC_ELT (omp_for_parse_state->orig_declv, i);
+	    tree d = TREE_VEC_ELT (omp_for_parse_state->declv, i);
+	    if (o != d)
+	      cp_finish_omp_range_for (o, d);
+	  }
 
-      if (i == count - 1)
-	break;
+      /* Now parse the final-loop-body for the innermost loop.  */
+      parser->omp_for_parse_state = NULL;
+      if (omp_for_parse_state->inscan)
+	cp_parser_omp_scan_loop_body (parser);
+      else
+	cp_parser_statement (parser, NULL_TREE, false, if_p);
+      parser->omp_for_parse_state = omp_for_parse_state;
+    }
+  parser->in_statement = save_in_statement;
+  omp_for_parse_state->want_nested_loop = false;
+  omp_for_parse_state->in_intervening_code = true;
+
+  /* Pop and remember the body block.  Add the body placeholder
+     to the surrounding statement list instead.  This is just a unique
+     token that will be replaced when we reassemble the generated
+     code for the entire omp for statement.  */
+  body_block = pop_stmt_list (body_block);
+  omp_for_parse_state->body_blockv[depth] = body_block;
+  add_stmt (body_placeholder);
+
+  /* Pop and remember the init block.  */
+  if (sl)
+    add_stmt (pop_stmt_list (sl));
+  finish_compound_stmt (init_scope);
+  init_block = pop_stmt_list (init_block);
+  omp_for_parse_state->init_blockv[depth] = init_block;
+
+  /* Return the init placeholder rather than the remembered init block.
+     Again, this is just a unique cookie that will be used to reassemble
+     code pieces when the entire omp for statement has been parsed.  */
+  return init_placeholder;
+}
+
+/* Worker for find_structured_blocks.  *TP points to a STATEMENT_LIST
+   and ITER is the element that is or contains a nested loop.  This
+   function moves the statements before and after ITER into
+   OMP_STRUCTURED_BLOCKs and modifies *TP.  */
+static void
+insert_structured_blocks (tree *tp, tree_stmt_iterator iter)
+{
+  tree sl = push_stmt_list ();
+  for (tree_stmt_iterator i = tsi_start (*tp); !tsi_end_p (i); )
+    if (i == iter)
+      {
+	sl = pop_stmt_list (sl);
+	if (TREE_CODE (sl) != STATEMENT_LIST || !tsi_end_p (tsi_start (sl)))
+	  tsi_link_before (&i,
+			   build1 (OMP_STRUCTURED_BLOCK, void_type_node, sl),
+			   TSI_SAME_STMT);
+	i++;
+	sl = push_stmt_list ();
+      }
+    else
+      {
+	tree s = tsi_stmt (i);
+	tsi_delink (&i);  /* Advances i to next statement.  */
+	add_stmt (s);
+      }
+  sl = pop_stmt_list (sl);
+  if (TREE_CODE (sl) != STATEMENT_LIST || !tsi_end_p (tsi_start (sl)))
+    tsi_link_after (&iter,
+		    build1 (OMP_STRUCTURED_BLOCK, void_type_node, sl),
+		    TSI_SAME_STMT);
+}
 
-      /* FIXME: OpenMP 3.0 draft isn't very clear on what exactly is allowed
-	 in between the collapsed for loops to be still considered perfectly
-	 nested.  Hopefully the final version clarifies this.
-	 For now handle (multiple) {'s and empty statements.  */
-      cp_parser_parse_tentatively (parser);
-      for (;;)
+/* Helper to find and mark structured blocks in intervening code for a
+   single loop level with markers for later error checking.  *TP is the
+   piece of code to be marked and INNER is the inner loop placeholder.
+   Returns true if INNER was found (recursively) in *TP.  */
+static bool
+find_structured_blocks (tree *tp, tree inner)
+{
+  if (*tp == inner)
+    return true;
+  else if (TREE_CODE (*tp) == BIND_EXPR)
+    return find_structured_blocks (&(BIND_EXPR_BODY (*tp)), inner);
+  else if (TREE_CODE (*tp) == STATEMENT_LIST)
+    {
+      for (tree_stmt_iterator i = tsi_start (*tp); !tsi_end_p (i); ++i)
 	{
-	  if (cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
-	    break;
-	  else if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+	  tree *p = tsi_stmt_ptr (i);
+	  /* The normal case is that there is no intervening code and we
+	     do not have to insert any OMP_STRUCTURED_BLOCK markers.  */
+	  if (find_structured_blocks (p, inner))
 	    {
-	      cp_lexer_consume_token (parser->lexer);
-	      bracecount++;
-	    }
-	  else if (bracecount
-		   && cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
-	    cp_lexer_consume_token (parser->lexer);
-	  else
-	    {
-	      loc = cp_lexer_peek_token (parser->lexer)->location;
-	      error_at (loc, "not enough for loops to collapse");
-	      collapse_err = true;
-	      cp_parser_abort_tentative_parse (parser);
-	      declv = NULL_TREE;
-	      break;
+	      if (!(i == tsi_start (*tp) && i == tsi_last (*tp)))
+		insert_structured_blocks (tp, i);
+	      return true;
 	    }
 	}
+      return false;
+    }
+  else if (TREE_CODE (*tp) == TRY_FINALLY_EXPR)
+    return find_structured_blocks (&(TREE_OPERAND (*tp, 0)), inner);
+  else if (TREE_CODE (*tp) == CLEANUP_STMT)
+    return find_structured_blocks (&(CLEANUP_BODY (*tp)), inner);
+  else
+    return false;
+}
+
+/* Helpers used for relinking tree structures: In tree rooted at
+   CONTEXT, replace ORIG with REPLACEMENT.  If FLATTEN is true, try to combine
+   nested BIND_EXPRs.  Gives an assertion if it fails to find ORIG.  */
+
+struct sit_data {
+  tree orig;
+  tree repl;
+  bool flatten;
+};
+
+static tree
+substitute_in_tree_walker (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED,
+			   void *dp)
+{
+  struct sit_data *sit = (struct sit_data *)dp;
+  if (*tp == sit->orig)
+    {
+      *tp = sit->repl;
+      return *tp;
+    }
+  /* Remove redundant BIND_EXPRs with no bindings even when not specifically
+     trying to flatten.  */
+  else if (TREE_CODE (*tp) == BIND_EXPR
+	   && BIND_EXPR_BODY (*tp) == sit->orig
+	   && !BIND_EXPR_VARS (*tp)
+	   && (sit->flatten || TREE_CODE (sit->repl) == BIND_EXPR))
+    {
+      *tp = sit->repl;
+      return *tp;
+    }
+  else if (sit->flatten
+	   && TREE_CODE (*tp) == BIND_EXPR
+	   && TREE_CODE (sit->repl) == BIND_EXPR)
+    {
+      if (BIND_EXPR_BODY (*tp) == sit->orig)
+	{
+	  /* Merge binding lists for two directly nested BIND_EXPRs,
+	     keeping the outer one.  */
+	  BIND_EXPR_VARS (*tp) = chainon (BIND_EXPR_VARS (*tp),
+					  BIND_EXPR_VARS (sit->repl));
+	  BIND_EXPR_BODY (*tp) = BIND_EXPR_BODY (sit->repl);
+	  return *tp;
+	}
+      else if (TREE_CODE (BIND_EXPR_BODY (*tp)) == STATEMENT_LIST)
+	/* There might be a statement list containing cleanup_points
+	   etc between the two levels of BIND_EXPR.  We can still merge
+	   them, again keeping the outer BIND_EXPR.  */
+	for (tree_stmt_iterator i = tsi_start (BIND_EXPR_BODY (*tp));
+	     !tsi_end_p (i); ++i)
+	  {
+	    tree *p = tsi_stmt_ptr (i);
+	    if (*p == sit->orig)
+	      {
+		BIND_EXPR_VARS (*tp) = chainon (BIND_EXPR_VARS (*tp),
+						BIND_EXPR_VARS (sit->repl));
+		*p = BIND_EXPR_BODY (sit->repl);
+		return *tp;
+	      }
+	  }
+    }
+  return NULL;
+}
+
+static void
+substitute_in_tree (tree *context, tree orig, tree repl, bool flatten)
+{
+  struct sit_data data;
+
+  gcc_assert (*context && orig && repl);
+  if (TREE_CODE (repl) == BIND_EXPR && !BIND_EXPR_VARS (repl))
+    repl = BIND_EXPR_BODY (repl);
+  data.orig = orig;
+  data.repl = repl;
+  data.flatten = flatten;
+
+  tree result = cp_walk_tree (context, substitute_in_tree_walker,
+			      (void *)&data, NULL);
+  gcc_assert (result != NULL_TREE);
+}
 
-      if (declv)
+/* Walker to patch up the BLOCK_NODE hierarchy after the above surgery.
+   *DP is is the parent block.  */
+
+static tree
+fixup_blocks_walker (tree *tp, int *walk_subtrees, void *dp)
+{
+  tree superblock = *(tree *)dp;
+
+  if (TREE_CODE (*tp) == BIND_EXPR)
+    {
+      tree block = BIND_EXPR_BLOCK (*tp);
+      if (superblock)
 	{
-	  cp_parser_parse_definitely (parser);
-	  nbraces += bracecount;
+	  BLOCK_SUPERCONTEXT (block) = superblock;
+	  BLOCK_CHAIN (block) = BLOCK_SUBBLOCKS (superblock);
+	  BLOCK_SUBBLOCKS (superblock) = block;
 	}
+      BLOCK_SUBBLOCKS (block) = NULL_TREE;
+      cp_walk_tree (&BIND_EXPR_BODY (*tp), fixup_blocks_walker,
+		    (void *)&block, NULL);
+      *walk_subtrees = 0;
     }
 
-  if (nbraces)
-    if_p = NULL;
+  return NULL;
+}
 
-  /* Note that we saved the original contents of this flag when we entered
-     the structured block, and so we don't need to re-save it here.  */
-  parser->in_statement = IN_OMP_FOR;
+/* Parse the restricted form of the for statement allowed by OpenMP.  */
+
+static tree
+cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
+			tree *cclauses, bool *if_p)
+{
+  tree ret;
+  tree cl, ordered_cl = NULL_TREE;
+  int collapse = 1, ordered = 0;
+  unsigned int count;
+  bool tiling = false;
+  bool inscan = false;
+  struct omp_for_parse_data data;
+  struct omp_for_parse_data *save_data = parser->omp_for_parse_state;
+  tree result;
+  location_t loc_first = cp_lexer_peek_token (parser->lexer)->location;
+
+  for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
+    if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
+      collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_TILE)
+      {
+	tiling = true;
+	collapse = list_length (OMP_CLAUSE_TILE_LIST (cl));
+      }
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
+	     && OMP_CLAUSE_ORDERED_EXPR (cl))
+      {
+	ordered_cl = cl;
+	ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
+      }
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_REDUCTION
+	     && OMP_CLAUSE_REDUCTION_INSCAN (cl)
+	     && (code == OMP_SIMD || code == OMP_FOR))
+      inscan = true;
 
-  /* Note that the grammar doesn't call for a structured block here,
-     though the loop as a whole is a structured block.  */
-  if (orig_declv)
+  if (ordered && ordered < collapse)
     {
-      body = begin_omp_structured_block ();
-      for (i = 0; i < count; i++)
-	if (TREE_VEC_ELT (orig_declv, i) != TREE_VEC_ELT (declv, i))
-	  cp_finish_omp_range_for (TREE_VEC_ELT (orig_declv, i),
-				   TREE_VEC_ELT (declv, i));
+      error_at (OMP_CLAUSE_LOCATION (ordered_cl),
+		"%<ordered%> clause parameter is less than %<collapse%>");
+      OMP_CLAUSE_ORDERED_EXPR (ordered_cl)
+	= build_int_cst (NULL_TREE, collapse);
+      ordered = collapse;
+    }
+
+  gcc_assert (tiling || (collapse >= 1 && ordered >= 0));
+  count = ordered ? ordered : collapse;
+
+  if (!cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+    {
+      cp_parser_error (parser, "for statement expected");
+      return NULL;
     }
-  else
-    body = push_stmt_list ();
-  if (inscan)
-    cp_parser_omp_scan_loop_body (parser);
-  else
-    cp_parser_statement (parser, NULL_TREE, false, if_p);
-  if (orig_declv)
-    body = finish_omp_structured_block (body);
-  else
-    body = pop_stmt_list (body);
 
-  if (declv == NULL_TREE)
+  /* Initialize parse state for recursive descent.  */
+  data.declv = make_tree_vec (count);
+  data.initv = make_tree_vec (count);
+  data.condv = make_tree_vec (count);
+  data.incrv = make_tree_vec (count);
+  data.pre_body = NULL_TREE;
+  data.for_loc = cp_lexer_peek_token (parser->lexer)->location;
+  data.count = count;
+  data.depth = 0;
+  data.want_nested_loop = true;
+  data.ordered = ordered > 0;
+  data.in_intervening_code = false;
+  data.perfect_nesting_fail = false;
+  data.fail = false;
+  data.inscan = inscan;
+  data.saw_intervening_code = false;
+  data.code = code;
+  data.orig_declv = NULL_TREE;
+  data.clauses = clauses;
+  data.cclauses = cclauses;
+  data.ordered_cl = ordered_cl;
+  parser->omp_for_parse_state = &data;
+
+  cp_parser_omp_loop_nest (parser, if_p);
+
+  /* Bomb out early if there was an error (not enough loops, etc).  */
+  if (data.fail || data.declv == NULL_TREE)
+    {
+      parser->omp_for_parse_state = save_data;
+      return NULL_TREE;
+    }
+
+  /* Relink the init and body blocks that were built during parsing.  At
+     this point we have a structure nested like
+       init 0
+	 body 0
+	   init 1
+	     body 1
+	       init 2
+		 body 2
+     and we want to turn it into
+      init 0
+	 init 1
+	   init 2
+	     omp_for
+	       body 0
+		 body 1
+		   body 2
+     We also need to flatten the init blocks, as some code for later
+     processing of combined directives gets confused otherwise.  */
+
+  gcc_assert (vec_safe_length (data.init_blockv) == count);
+  gcc_assert (vec_safe_length (data.body_blockv) == count);
+  gcc_assert (vec_safe_length (data.init_placeholderv) == count);
+  gcc_assert (vec_safe_length (data.body_placeholderv) == count);
+
+  /* First insert markers for structured blocks for intervening code in
+     the loop bodies.  */
+  for (unsigned int i = 0; i < count - 1; i++)
+    {
+      bool good = find_structured_blocks (&(data.body_blockv[i]),
+					  data.init_placeholderv[i+1]);
+      gcc_assert (good);
+    }
+
+  /* Do the substitution from the inside out.  */
+  for (unsigned int i = count - 1; i > 0; i--)
+    {
+      substitute_in_tree (&(data.body_blockv[i-1]),
+			  data.init_placeholderv[i],
+			  data.body_blockv[i], false);
+      substitute_in_tree (&(data.init_blockv[i-1]),
+			  data.body_placeholderv[i-1],
+			  data.init_blockv[i], true);
+    }
+
+  /* Generate the OMP_FOR.  Note finish_omp_for adds the OMP_FOR
+     (and possibly other stuff) to the current statement list but
+     returns a pointer to the OMP_FOR itself, or null in case of error.  */
+  result = push_stmt_list ();
+  ret = finish_omp_for (loc_first, code, data.declv, data.orig_declv,
+			data.initv, data.condv, data.incrv,
+			data.body_blockv[0],
+			data.pre_body, &data.orig_inits, data.clauses);
+  result = pop_stmt_list (result);
+
+  /* Check for errors involving lb/ub/incr expressions referencing
+     variables declared in intervening code.  */
+  if (data.saw_intervening_code
+      && !c_omp_check_loop_binding_exprs (ret, &data.orig_inits))
     ret = NULL_TREE;
-  else
-    ret = finish_omp_for (loc_first, code, declv, orig_declv, initv, condv,
-			  incrv, body, pre_body, &orig_inits, clauses);
 
-  while (nbraces)
+  if (ret)
     {
-      if (cp_lexer_next_token_is (parser->lexer, CPP_CLOSE_BRACE))
-	{
-	  cp_lexer_consume_token (parser->lexer);
-	  nbraces--;
-	}
-      else if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
-	cp_lexer_consume_token (parser->lexer);
-      else
+      /* Splice the omp_for into the nest of init blocks.  */
+      substitute_in_tree (&(data.init_blockv[0]),
+			  data.body_placeholderv[count - 1],
+			  result, true);
+
+      /* Some later processing for combined directives assumes
+	 that the BIND_EXPR containing range for variables appears
+	 at top level in the OMP_FOR body.  Fix that up if it's
+	 not the case, e.g. because there is intervening code.  */
+      if (code != OACC_LOOP)
+	finish_omp_for_block (data.init_blockv[0], ret);
+
+      /* Clean up the block subblock/superblock links.  Per comment in
+	 begin_compound_stmt, "we don't build BLOCK nodes when processing
+	 templates", so skip this step in that case.  */
+      if (!processing_template_decl)
 	{
-	  if (!collapse_err)
-	    {
-	      error_at (cp_lexer_peek_token (parser->lexer)->location,
-			"collapsed loops not perfectly nested");
-	    }
-	  collapse_err = true;
-	  cp_parser_statement_seq_opt (parser, NULL);
-	  if (cp_lexer_next_token_is (parser->lexer, CPP_EOF))
-	    break;
+	  tree superblock = NULL_TREE;
+	  cp_walk_tree (&data.init_blockv[0], fixup_blocks_walker,
+			(void *)&superblock, NULL);
 	}
-    }
 
-  while (!for_block->is_empty ())
-    {
-      tree t = for_block->pop ();
-      if (TREE_CODE (t) == STATEMENT_LIST)
-	add_stmt (pop_stmt_list (t));
-      else
-	add_stmt (t);
+      /* Finally record the result.  */
+      add_stmt (data.init_blockv[0]);
     }
 
+  parser->omp_for_parse_state = save_data;
   return ret;
 }
 
@@ -44155,7 +44721,7 @@  cp_parser_omp_loop (cp_parser *parser, cp_token *pragma_tok,
   ret = cp_parser_omp_for_loop (parser, OMP_LOOP, clauses, cclauses, if_p);
 
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_for_block (finish_omp_structured_block (sb), ret));
+  add_stmt (finish_omp_structured_block (sb));
 
   return ret;
 }
@@ -44204,7 +44770,7 @@  cp_parser_omp_simd (cp_parser *parser, cp_token *pragma_tok,
   ret = cp_parser_omp_for_loop (parser, OMP_SIMD, clauses, cclauses, if_p);
 
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_for_block (finish_omp_structured_block (sb), ret));
+  add_stmt (finish_omp_structured_block (sb));
 
   return ret;
 }
@@ -44306,7 +44872,7 @@  cp_parser_omp_for (cp_parser *parser, cp_token *pragma_tok,
   ret = cp_parser_omp_for_loop (parser, OMP_FOR, clauses, cclauses, if_p);
 
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_for_block (finish_omp_structured_block (sb), ret));
+  add_stmt (finish_omp_structured_block (sb));
 
   return ret;
 }
@@ -45154,7 +45720,7 @@  cp_parser_omp_distribute (cp_parser *parser, cp_token *pragma_tok,
   ret = cp_parser_omp_for_loop (parser, OMP_DISTRIBUTE, clauses, NULL, if_p);
 
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_for_block (finish_omp_structured_block (sb), ret));
+  add_stmt (finish_omp_structured_block (sb));
 
   return ret;
 }
@@ -46189,7 +46755,15 @@  cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
   int save = cp_parser_begin_omp_structured_block (parser);
   tree stmt = cp_parser_omp_for_loop (parser, OACC_LOOP, clauses, NULL, if_p);
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_structured_block (block));
+
+  /* Later processing of combined acc loop constructs gets confused
+     by an extra level of empty nested BIND_EXPRs, so flatten them.  */
+  block = finish_omp_structured_block (block);
+  if (TREE_CODE (block) == BIND_EXPR
+      && TREE_CODE (BIND_EXPR_BODY (block)) == BIND_EXPR
+      && !BIND_EXPR_VARS (block))
+    block = BIND_EXPR_BODY (block);
+  add_stmt (block);
 
   return stmt;
 }
@@ -48541,7 +49115,7 @@  cp_parser_omp_taskloop (cp_parser *parser, cp_token *pragma_tok,
 				if_p);
 
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_for_block (finish_omp_structured_block (sb), ret));
+  add_stmt (finish_omp_structured_block (sb));
 
   return ret;
 }
@@ -49316,6 +49890,17 @@  cp_parser_pragma (cp_parser *parser, enum pragma_context context, bool *if_p)
   parser->lexer->in_pragma = true;
 
   id = cp_parser_pragma_kind (pragma_tok);
+  if (parser->omp_for_parse_state
+      && parser->omp_for_parse_state->in_intervening_code
+      && id >= PRAGMA_OMP__START_
+      && id <= PRAGMA_OMP__LAST_)
+    {
+      error_at (pragma_tok->location,
+		"intervening code must not contain OpenMP directives");
+      parser->omp_for_parse_state->fail = true;
+      cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+      return false;
+    }
   if (id != PRAGMA_OMP_DECLARE && id != PRAGMA_OACC_ROUTINE)
     cp_ensure_no_omp_declare_simd (parser);
   switch (id)
diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h
index e261d7e16e4..6cbb9a8e031 100644
--- a/gcc/cp/parser.h
+++ b/gcc/cp/parser.h
@@ -435,6 +435,9 @@  struct GTY(()) cp_parser {
      specification, if any, or UNKNOWN_LOCATION otherwise.  */
   location_t innermost_linkage_specification_location;
 
+  /* Pointer to state for parsing omp_loops.  Managed by
+     cp_parser_omp_for_loop in parser.cc and not used outside that file.  */
+  struct omp_for_parse_data * GTY((skip)) omp_for_parse_state;
 };
 
 /* In parser.cc  */
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 303f72353c0..35c7705151d 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -18432,7 +18432,8 @@  tsubst_omp_for_iterator (tree t, int i, tree declv, tree &orig_declv,
       tree this_pre_body = NULL_TREE;
       tree orig_init = NULL_TREE;
       tree orig_decl = NULL_TREE;
-      cp_convert_omp_range_for (this_pre_body, NULL, decl, orig_decl, init,
+      tree init_sl = NULL_TREE;
+      cp_convert_omp_range_for (this_pre_body, init_sl, decl, orig_decl, init,
 				orig_init, cond, incr);
       if (orig_decl)
 	{
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..7b8a44f5792 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -10520,6 +10520,7 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
   int i;
   int collapse = 1;
   int ordered = 0;
+  auto_vec<location_t> init_locv;
 
   gcc_assert (TREE_VEC_LENGTH (declv) == TREE_VEC_LENGTH (initv));
   gcc_assert (TREE_VEC_LENGTH (declv) == TREE_VEC_LENGTH (condv));
@@ -10548,6 +10549,28 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
       incr = TREE_VEC_ELT (incrv, i);
       elocus = locus;
 
+      /* We are going to throw out the init's original MODIFY_EXPR or
+	 MODOP_EXPR below.  Save its location so we can use it when
+	 reconstructing the expression farther down.  Alternatively, if the
+	 initializer is a binding of the iteration variable, save
+	 that location.  Any of these locations in the initialization clause
+	 for the current nested loop are better than using the argument locus,
+	 that points to the "for" of the the outermost loop in the nest.  */
+      if (init && EXPR_HAS_LOCATION (init))
+	elocus = EXPR_LOCATION (init);
+      else if (decl && INDIRECT_REF_P (decl) && EXPR_HAS_LOCATION (decl))
+	/* This can happen for class iterators.  */
+	elocus = EXPR_LOCATION (decl);
+      else if (decl && DECL_P (decl))
+	{
+	  if (DECL_SOURCE_LOCATION (decl) != UNKNOWN_LOCATION)
+	    elocus = DECL_SOURCE_LOCATION (decl);
+	  else if (DECL_INITIAL (decl)
+		   && EXPR_HAS_LOCATION (DECL_INITIAL (decl)))
+	    elocus = EXPR_LOCATION (DECL_INITIAL (decl));
+	}
+      init_locv.safe_push (elocus);
+
       if (decl == NULL)
 	{
 	  if (init != NULL)
@@ -10576,9 +10599,6 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	    }
 	}
 
-      if (init && EXPR_HAS_LOCATION (init))
-	elocus = EXPR_LOCATION (init);
-
       if (cond == global_namespace)
 	continue;
 
@@ -10625,8 +10645,8 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	     again and going through the cp_build_modify_expr path below when
 	     we instantiate the thing.  */
 	  TREE_VEC_ELT (initv, i)
-	    = build2 (MODIFY_EXPR, void_type_node, TREE_VEC_ELT (declv, i),
-		      TREE_VEC_ELT (initv, i));
+	    = build2_loc (init_locv[i], MODIFY_EXPR, void_type_node,
+			  TREE_VEC_ELT (declv, i), TREE_VEC_ELT (initv, i));
 	}
 
       TREE_TYPE (stmt) = void_type_node;
@@ -10655,10 +10675,7 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
       incr = TREE_VEC_ELT (incrv, i);
       if (orig_incr)
 	TREE_VEC_ELT (orig_incr, i) = incr;
-      elocus = locus;
-
-      if (init && EXPR_HAS_LOCATION (init))
-	elocus = EXPR_LOCATION (init);
+      elocus = init_locv[i];
 
       if (!DECL_P (decl))
 	{
@@ -10703,7 +10720,7 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	init = cp_build_modify_expr (elocus, decl, NOP_EXPR, init,
 				     tf_warning_or_error);
       else
-	init = build2 (MODIFY_EXPR, void_type_node, decl, init);
+	init = build2_loc (elocus, MODIFY_EXPR, void_type_node, decl, init);
       if (decl == error_mark_node || init == error_mark_node)
 	return NULL;
 
@@ -10875,47 +10892,71 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv,
   return omp_for;
 }
 
-/* Fix up range for decls.  Those decls were pushed into BIND's BIND_EXPR_VARS
-   and need to be moved into the BIND_EXPR inside of the OMP_FOR's body.  */
+/* Code walker for finish_omp_for_block: extract binding of DP->var
+   from its current block and move it to a new BIND_EXPR DP->b
+   surrounding the body of DP->omp_for.  */
+
+struct fofb_data {
+  tree var;
+  tree b;
+  tree omp_for;
+};
+
+static tree
+finish_omp_for_block_walker (tree *tp, int *walk_subtrees, void *dp)
+{
+  struct fofb_data *fofb = (struct fofb_data *)dp;
+  if (TREE_CODE (*tp) == BIND_EXPR)
+    for (tree *p = &BIND_EXPR_VARS (*tp); *p; p = &DECL_CHAIN (*p))
+      {
+	if (*p == fofb->var)
+	  {
+	    *p = DECL_CHAIN (*p);
+	    if (fofb->b == NULL_TREE)
+	      {
+		fofb->b = make_node (BLOCK);
+		fofb->b = build3 (BIND_EXPR, void_type_node, NULL_TREE,
+			    OMP_FOR_BODY (fofb->omp_for), fofb->b);
+		TREE_SIDE_EFFECTS (fofb->b) = 1;
+		OMP_FOR_BODY (fofb->omp_for) = fofb->b;
+	      }
+	    DECL_CHAIN (fofb->var) = BIND_EXPR_VARS (fofb->b);
+	    BIND_EXPR_VARS (fofb->b) = fofb->var;
+	    BLOCK_VARS (BIND_EXPR_BLOCK (fofb->b)) = fofb->var;
+	    BLOCK_VARS (BIND_EXPR_BLOCK (*tp)) = BIND_EXPR_VARS (*tp);
+	    return *tp;
+	  }
+      }
+  if (TREE_CODE (*tp) != BIND_EXPR && TREE_CODE (*tp) != STATEMENT_LIST)
+    *walk_subtrees = false;
+  return NULL_TREE;
+}
 
+/* Fix up range for decls.  Those decls were pushed into BIND's
+   BIND_EXPR_VARS, or that of a nested BIND_EXPR inside its body,
+   and need to be moved into a new BIND_EXPR surrounding OMP_FOR's body
+   so that processing of combined loop directives can find them.  */
 tree
 finish_omp_for_block (tree bind, tree omp_for)
 {
   if (omp_for == NULL_TREE
       || !OMP_FOR_ORIG_DECLS (omp_for)
-      || bind == NULL_TREE
-      || TREE_CODE (bind) != BIND_EXPR)
+      || bind == NULL_TREE)
     return bind;
-  tree b = NULL_TREE;
+  struct fofb_data fofb;
+  fofb.b = NULL_TREE;
+  fofb.omp_for = omp_for;
   for (int i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (omp_for)); i++)
     if (TREE_CODE (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (omp_for), i)) == TREE_LIST
 	&& TREE_CHAIN (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (omp_for), i)))
       {
 	tree v = TREE_CHAIN (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (omp_for), i));
-	gcc_assert (BIND_EXPR_BLOCK (bind)
-		    && (BIND_EXPR_VARS (bind)
-			== BLOCK_VARS (BIND_EXPR_BLOCK (bind))));
 	for (int j = 2; j < TREE_VEC_LENGTH (v); j++)
-	  for (tree *p = &BIND_EXPR_VARS (bind); *p; p = &DECL_CHAIN (*p))
-	    {
-	      if (*p == TREE_VEC_ELT (v, j))
-		{
-		  tree var = *p;
-		  *p = DECL_CHAIN (*p);
-		  if (b == NULL_TREE)
-		    {
-		      b = make_node (BLOCK);
-		      b = build3 (BIND_EXPR, void_type_node, NULL_TREE,
-				  OMP_FOR_BODY (omp_for), b);
-		      TREE_SIDE_EFFECTS (b) = 1;
-		      OMP_FOR_BODY (omp_for) = b;
-		    }
-		  DECL_CHAIN (var) = BIND_EXPR_VARS (b);
-		  BIND_EXPR_VARS (b) = var;
-		  BLOCK_VARS (BIND_EXPR_BLOCK (b)) = var;
-		}
-	    }
-	BLOCK_VARS (BIND_EXPR_BLOCK (bind)) = BIND_EXPR_VARS (bind);
+	  {
+	    fofb.var = TREE_VEC_ELT (v, j);
+	    cp_walk_tree (&bind, finish_omp_for_block_walker,
+			  (void *)&fofb, NULL);
+	  }
       }
   return bind;
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/tile-2.c b/gcc/testsuite/c-c++-common/goacc/tile-2.c
index 98abc903bdc..dc306703260 100644
--- a/gcc/testsuite/c-c++-common/goacc/tile-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/tile-2.c
@@ -3,8 +3,8 @@  int main ()
 #pragma acc parallel
   {
 #pragma acc loop tile (*,*)
-    for (int ix = 0; ix < 30; ix++) /* { dg-error "not enough" "" { target c } } */
-      ; /* { dg-error "not enough" "" { target c++ } } */
+    for (int ix = 0; ix < 30; ix++) /* { dg-error "not enough" } */
+      ;
 
 #pragma acc loop tile (*,*)
     for (int ix = 0; ix < 30; ix++)
diff --git a/gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C b/gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C
new file mode 100644
index 00000000000..cf293b5081c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C
@@ -0,0 +1,38 @@ 
+/* { dg-do compile { target c++11 } } */
+
+/* This test case is expected to fail due to errors.  */
+
+int f1 (int depth, int iter);
+int f2 (int depth, int iter);
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      f1 (0, i);
+      for (j = 0; j < a2; j++)
+	{
+	  [[ omp :: directive (barrier) ]] ;	/* { dg-error "intervening code must not contain OpenMP directives" } */
+	  f1 (1, j);
+	  if (i == 2)
+	    continue;	/* { dg-error "invalid exit" } */
+	  else
+	    break;	/* { dg-error "invalid exit" } */
+	  for (k = 0; k < a3; k++)
+	    {
+	      f1 (2, k);
+	      f2 (2, k);
+	    }
+	  f2 (1, j);
+	}
+      for (k = 0; k < a3; k++)	/* { dg-error "loop not permitted in intervening code " } */
+	{
+	  f1 (2, k);
+	  f2 (2, k);
+	}
+      f2 (0, i);
+    }
+}
diff --git a/gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C b/gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C
new file mode 100644
index 00000000000..0c9154dd10c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C
@@ -0,0 +1,34 @@ 
+/* { dg-do compile { target c++11 } } */
+
+/* This test case is expected to fail due to errors.  */
+
+/* These functions that are part of the OpenMP runtime API would ordinarily
+   be declared in omp.h, but we don't have that here.  */
+extern int omp_get_num_threads(void);
+extern int omp_get_max_threads(void);
+
+int f1 (int depth, int iter);
+int f2 (int depth, int iter);
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      f1 (0, i);
+      for (j = 0; j < omp_get_num_threads (); j++)  /* This is OK */
+	{
+	  f1 (1, omp_get_num_threads ());  /* { dg-error "not permitted in intervening code" } */
+	  for (k = omp_get_num_threads (); k < a3; k++)  /* This is OK */
+	    {
+	      f1 (2, omp_get_num_threads ());
+	      f2 (2, omp_get_max_threads ());
+	    }
+	  f2 (1, omp_get_max_threads ());  /* { dg-error "not permitted in intervening code" } */
+	}
+      f2 (0, i);
+    }
+}
+
+
diff --git a/gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C b/gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C
new file mode 100644
index 00000000000..6b612afd355
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target c++11 } } */
+
+/* This test case is expected to fail due to errors.  */
+
+/* Test that the imperfectly-nested loops with the ordered clause gives
+   an error, and that there is only one error (and not one on every
+   intervening statement).  */
+
+int f1 (int depth, int iter);
+int f2 (int depth, int iter);
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, ordered(3)) ]]
+  for (i = 0; i < a1; i++)  /* { dg-error "inner loops must be perfectly nested" } */
+    {
+      f1 (0, i);
+      for (j = 0; j < a2; j++)
+	{
+	  f1 (1, j);
+	  for (k = 0; k < a3; k++)
+	    {
+	      f1 (2, k);
+	      f2 (2, k);
+	    }
+	  f2 (1, j);
+	}
+      f2 (0, i);
+    }
+}
+
diff --git a/gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C b/gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C
new file mode 100644
index 00000000000..16636ab3eb6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target c++11 } } */
+
+/* This test case is expected to fail due to errors.  */
+
+int f1 (int depth, int iter);
+int f2 (int depth, int iter);
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(4)) ]]
+  for (i = 0; i < a1; i++)	/* { dg-error "not enough nested loops" } */
+    {
+      f1 (0, i);
+      for (j = 0; j < a2; j++)
+	{
+	  f1 (1, j);
+	  for (k = 0; k < a3; k++)
+	    {
+	      /* According to the grammar, this is intervening code; we
+		 don't know that we are also missing a nested for loop
+		 until we have parsed this whole compound expression.  */
+	      [[ omp :: directive (barrier) ]] ;	/* { dg-error "intervening code must not contain OpenMP directives" } */
+	      f1 (2, k);
+	      f2 (2, k);
+	    }
+	  f2 (1, j);
+	}
+      f2 (0, i);
+    }
+}
+
diff --git a/gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C b/gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C
new file mode 100644
index 00000000000..301307262a9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C
@@ -0,0 +1,57 @@ 
+/* { dg-do compile { target c++11 } } */
+
+/* This test case is expected to fail due to errors.  */
+
+int f1 (int depth, int iter);
+int f2 (int depth, int iter);
+int ijk (int x, int y, int z);
+void f3 (int sum);
+
+/* This function isn't particularly meaningful, but it should compile without
+   error.  */
+int s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+  int r = 0;
+
+  [[ omp :: directive (simd, collapse(3), reduction (inscan, +:r)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      for (j = 0; j < a2; j++)
+	{
+	  for (k = 0; k < a3; k++)
+	    {
+	      r = r + ijk (i, j, k);
+	      [[ omp :: directive (scan, exclusive (r)) ]] ;
+	      f3 (r);
+	    }
+	}
+    }
+  return r;
+}
+
+/* Adding intervening code should trigger an error.  */
+int s2 (int a1, int a2, int a3)
+{
+  int i, j, k;
+  int r = 0;
+
+  [[ omp :: directive (simd, collapse(3), reduction (inscan, +:r)) ]]
+  for (i = 0; i < a1; i++)  /* { dg-error "inner loops must be perfectly nested" } */
+    {
+      f1 (0, i);
+      for (j = 0; j < a2; j++)
+	{
+	  f1 (1, j);
+	  for (k = 0; k < a3; k++)
+	    {
+	      r = r + ijk (i, j, k);
+	      [[ omp :: directive (scan, exclusive (r)) ]] ;
+	      f3 (r);
+	    }
+	  f2 (1, j);
+	}
+      f2 (0, i);
+    }
+  return r;
+}
diff --git a/gcc/testsuite/g++.dg/gomp/pr41967.C b/gcc/testsuite/g++.dg/gomp/pr41967.C
index 0eb489e8bee..7b59f831fe0 100644
--- a/gcc/testsuite/g++.dg/gomp/pr41967.C
+++ b/gcc/testsuite/g++.dg/gomp/pr41967.C
@@ -11,7 +11,7 @@  foo ()
     {
       for (int j = 0; j < 5; ++j)
 	++sum;
-      ++sum;	// { dg-error "collapsed loops not perfectly nested" }
+      ++sum;
     }
   return sum;
 }
diff --git a/gcc/testsuite/g++.dg/gomp/tpl-imperfect-gotos.C b/gcc/testsuite/g++.dg/gomp/tpl-imperfect-gotos.C
new file mode 100644
index 00000000000..72206128fae
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/tpl-imperfect-gotos.C
@@ -0,0 +1,161 @@ 
+/* { dg-do compile } */
+
+/* This file contains tests that are expected to fail.  */
+
+
+/* These jumps are all OK since they are to/from the same structured block.  */
+
+template<typename T>
+void f1a (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      goto a; a:;
+      for (T j = 0; j < 64; ++j)
+	{
+	  goto c; c:;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump around loop body to/from different structured blocks of intervening
+   code.  */
+template<typename T>
+void f2a (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      goto a; a:;
+      if (i > 16) goto b; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      for (T j = 0; j < 64; ++j)
+	{
+	  goto c; c:;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump into loop body from intervening code.  */
+template<typename T>
+void f3a (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      goto a; a:;
+      if (i > 16) goto c; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      for (T j = 0; j < 64; ++j)
+	{
+	c:  /* { dg-error "jump to label .c." } */
+	  ;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump out of loop body to intervening code.  */
+template<typename T>
+void f4a (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      goto a; a:;
+      for (T j = 0; j < 64; ++j)
+	if (i > 16) goto c; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      c:
+	;
+      goto b; b:;
+    }
+}
+
+/* The next group of tests use the GNU extension for local labels.  Expected
+   behavior is the same as the above group.  */
+
+/* These jumps are all OK since they are to/from the same structured block.  */
+
+template<typename T>
+void f1b (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      __label__ a, b, c;
+      goto a; a:;
+      for (T j = 0; j < 64; ++j)
+	{
+	  goto c; c:;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump around loop body to/from different structured blocks of intervening
+   code.  */
+template<typename T>
+void f2b (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      __label__ a, b, c;
+      goto a; a:;
+      if (i > 16) goto b; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      for (T j = 0; j < 64; ++j)
+	{
+	  goto c; c:;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump into loop body from intervening code.  */
+template<typename T>
+void f3b (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      __label__ a, b, c;
+      goto a; a:;
+      if (i > 16) goto c; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      for (T j = 0; j < 64; ++j)
+	{
+	c:  /* { dg-error "jump to label .c." } */
+	  ;
+	}
+      goto b; b:;
+    }
+}
+
+/* Jump out of loop body to intervening code.  */
+template<typename T>
+void f4b (void)
+{
+#pragma omp for collapse(2)
+  for (T i = 0; i < 64; ++i)
+    {
+      __label__ a, b, c;
+      goto a; a:;
+      for (T j = 0; j < 64; ++j)
+	if (i > 16) goto c; /* { dg-error "invalid branch to/from OpenMP structured block" } */
+      c:
+	;
+      goto b; b:;
+    }
+}
+
+int main (void)
+{
+  f1a<int> ();
+  f2a<int> ();
+  f3a<int> ();
+  f4a<int> ();
+  f1b<int> ();
+  f2b<int> ();
+  f3b<int> ();
+  f4b<int> ();
+}
diff --git a/gcc/testsuite/g++.dg/gomp/tpl-imperfect-invalid-scope.C b/gcc/testsuite/g++.dg/gomp/tpl-imperfect-invalid-scope.C
new file mode 100644
index 00000000000..1e85e64b14a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/tpl-imperfect-invalid-scope.C
@@ -0,0 +1,94 @@ 
+/* { dg-do compile } */
+
+/* Check that various cases of invalid references to variables bound
+   in an intervening code scope are diagnosed and do not ICE.  This test
+   is expected to produce errors.  */
+
+template<typename T>
+extern void foo (T, T);
+
+template<typename T>
+void f1 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T v = (i + 4) * 2;
+      for (T j = v; j < 64; j++)  /* { dg-error "initializer is bound in intervening code" }  */
+	foo (i, j);
+    }
+}
+
+template<typename T>
+void f2 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T v = (i + 4) * 2;
+      for (T j = 0; j < v; j++)  /* { dg-error "end test is bound in intervening code" }  */
+	foo (i, j);
+    }
+}
+
+template<typename T>
+void f3 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T v = (i + 4) * 2;
+      for (T j = 0; j < 64; j = j + v)  /* { dg-error "increment expression is bound in intervening code" }  */
+	foo (i, j);
+    }
+}
+
+template<typename T>
+void f4 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T v = 8;
+      for (T j = v; j < 64; j++)  /* { dg-error "initializer is bound in intervening code" }  */
+	foo (i, j);
+    }
+}
+
+template<typename T>
+void f5 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T j;
+      for (j = 0; j < 64; j++)  /* { dg-error "loop variable is bound in intervening code" }  */
+	foo (i, j);
+    }
+}
+
+template<typename T>
+void f6 (void)
+{
+#pragma omp for collapse (2)
+  for (T i = 0; i < 64; i++)
+    {
+      T j;
+      {
+	T v = 8;
+	for (j = v; j < 64; j++)    /* { dg-error "loop variable is bound in intervening code" }  */
+	  /* { dg-error "initializer is bound in intervening code" "" { target *-*-* } .-1 } */
+	  foo (i, j);
+      }
+    }
+}
+
+int main()
+{
+  f1<int> ();
+  f2<int> ();
+  f3<int> ();
+  f4<int> ();
+  f5<int> ();
+  f6<int> ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect1.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect1.C
new file mode 100644
index 00000000000..4cbea6280cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect1.C
@@ -0,0 +1,76 @@ 
+/* { dg-do run } */
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      f1 (0, i);
+      for (j = 0; j < a2; j++)
+	{
+	  f1 (1, j);
+	  for (k = 0; k < a3; k++)
+	    {
+	      f1 (2, k);
+	      f2 (2, k);
+	    }
+	  f2 (1, j);
+	}
+      f2 (0, i);
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect2.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect2.C
new file mode 100644
index 00000000000..9fb82d9c817
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect2.C
@@ -0,0 +1,114 @@ 
+/* { dg-do run } */
+
+static int f1count[3], f2count[3];
+static int g1count[3], g2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+int g1 (int depth, int iter)
+{
+  g1count[depth]++;
+  return iter;
+}
+
+int g2 (int depth, int iter)
+{
+  g2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      f1 (0, i);
+      {
+	g1 (0, i);
+	for (j = 0; j < a2; j++)
+	  {
+	    f1 (1, j);
+	    {
+	      g1 (1, j);
+	      for (k = 0; k < a3; k++)
+		{
+		  f1 (2, k);
+		  {
+		    g1 (2, k);
+		    g2 (2, k);
+		  }
+		  f2 (2, k);
+		}
+	      g2 (1, j);
+	    }
+	  f2 (1, j);
+	  }
+	g2 (0, i);
+      }
+      f2 (0, i);
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  g1count[0] = 0;
+  g1count[1] = 0;
+  g1count[2] = 0;
+  g2count[0] = 0;
+  g2count[1] = 0;
+  g2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+  if (g1count[0] != f1count[0]) abort ();
+  if (g2count[0] != f1count[0]) abort ();
+  if (g1count[1] != f1count[1]) abort ();
+  if (g2count[1] != f1count[1]) abort ();
+  if (g1count[2] != f1count[2]) abort ();
+  if (g2count[2] != f1count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect3.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect3.C
new file mode 100644
index 00000000000..51cb23aa02d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect3.C
@@ -0,0 +1,119 @@ 
+/* { dg-do run } */
+
+/* Like imperfect2.c, but includes bindings in the blocks.  */
+
+static int f1count[3], f2count[3];
+static int g1count[3], g2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+int g1 (int depth, int iter)
+{
+  g1count[depth]++;
+  return iter;
+}
+
+int g2 (int depth, int iter)
+{
+  g2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      int local0 = 0;
+      f1 (local0, i);
+      {
+	g1 (local0, i);
+	for (j = 0; j < a2; j++)
+	  {
+	    int local1 = 1;
+	    f1 (local1, j);
+	    {
+	      g1 (local1, j);
+	      for (k = 0; k < a3; k++)
+		{
+		  int local2 = 2;
+		  f1 (local2, k);
+		  {
+		    g1 (local2, k);
+		    g2 (local2, k);
+		  }
+		  f2 (local2, k);
+		}
+	      g2 (local1, j);
+	    }
+	  f2 (local1, j);
+	  }
+	g2 (local0, i);
+      }
+      f2 (local0, i);
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  g1count[0] = 0;
+  g1count[1] = 0;
+  g1count[2] = 0;
+  g2count[0] = 0;
+  g2count[1] = 0;
+  g2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+  if (g1count[0] != f1count[0]) abort ();
+  if (g2count[0] != f1count[0]) abort ();
+  if (g1count[1] != f1count[1]) abort ();
+  if (g2count[1] != f1count[1]) abort ();
+  if (g1count[2] != f1count[2]) abort ();
+  if (g2count[2] != f1count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect4.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect4.C
new file mode 100644
index 00000000000..cc0a034bbed
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect4.C
@@ -0,0 +1,117 @@ 
+/* { dg-do run } */
+
+/* Like imperfect2.c, but includes blocks that are themselves intervening
+   code.  */
+
+static int f1count[3], f2count[3];
+static int g1count[3], g2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+int g1 (int depth, int iter)
+{
+  g1count[depth]++;
+  return iter;
+}
+
+int g2 (int depth, int iter)
+{
+  g2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (i = 0; i < a1; i++)
+    {
+      { f1 (0, i); }
+      {
+	g1 (0, i);
+	for (j = 0; j < a2; j++)
+	  {
+	    { f1 (1, j); }
+	    {
+	      { g1 (1, j); }
+	      for (k = 0; k < a3; k++)
+		{
+		  f1 (2, k);
+		  {
+		    g1 (2, k);
+		    g2 (2, k);
+		  }
+		  f2 (2, k);
+		}
+	      { g2 (1, j); }
+	    }
+	    { f2 (1, j); }
+	  }
+	{ g2 (0, i); }
+      }
+      { f2 (0, i); }
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  g1count[0] = 0;
+  g1count[1] = 0;
+  g1count[2] = 0;
+  g2count[0] = 0;
+  g2count[1] = 0;
+  g2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+  if (g1count[0] != f1count[0]) abort ();
+  if (g2count[0] != f1count[0]) abort ();
+  if (g1count[1] != f1count[1]) abort ();
+  if (g2count[1] != f1count[1]) abort ();
+  if (g1count[2] != f1count[2]) abort ();
+  if (g2count[2] != f1count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect5.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect5.C
new file mode 100644
index 00000000000..89a969db8cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect5.C
@@ -0,0 +1,49 @@ 
+/* { dg-do run } */
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+static int inner_loop_count = 0;
+static int intervening_code_count = 0;
+
+void
+g (int x, int y)
+{
+  inner_loop_count++;
+}
+
+int
+foo (int imax, int jmax)
+{
+  int j = 0;
+
+  [[ omp :: directive (for, collapse(2)) ]]
+  for (int i = 0; i < imax; ++i)
+    {
+      /* All the intervening code at the same level must be executed
+	 the same number of times.  */
+      ++intervening_code_count;
+      for (int j = 0; j < jmax; ++j)
+	{
+	  g (i, j);
+	}
+      /* This is the outer j, not the one from the inner collapsed loop.  */
+      ++j;
+    }
+  return j;
+}
+
+int
+main (void)
+{
+  int j = foo (5, 3);
+  if (j != intervening_code_count)
+    abort ();
+  if (inner_loop_count != 5 * 3)
+    abort ();
+  if (intervening_code_count < 5 || intervening_code_count > 5 * 3)
+    abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/attrs-imperfect6.C b/libgomp/testsuite/libgomp.c++/attrs-imperfect6.C
new file mode 100644
index 00000000000..01f9be123a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/attrs-imperfect6.C
@@ -0,0 +1,115 @@ 
+/* { dg-do run } */
+
+/* Like imperfect4.c, but bind the iteration variables in the loops.  */
+
+static int f1count[3], f2count[3];
+static int g1count[3], g2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+int g1 (int depth, int iter)
+{
+  g1count[depth]++;
+  return iter;
+}
+
+int g2 (int depth, int iter)
+{
+  g2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+
+  [[ omp :: directive (for, collapse(3)) ]]
+  for (int i = 0; i < a1; i++)
+    {
+      { f1 (0, i); }
+      {
+	g1 (0, i);
+	for (int j = 0; j < a2; j++)
+	  {
+	    { f1 (1, j); }
+	    {
+	      { g1 (1, j); }
+	      for (int k = 0; k < a3; k++)
+		{
+		  f1 (2, k);
+		  {
+		    g1 (2, k);
+		    g2 (2, k);
+		  }
+		  f2 (2, k);
+		}
+	      { g2 (1, j); }
+	    }
+	    { f2 (1, j); }
+	  }
+	{ g2 (0, i); }
+      }
+      { f2 (0, i); }
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  g1count[0] = 0;
+  g1count[1] = 0;
+  g1count[2] = 0;
+  g2count[0] = 0;
+  g2count[1] = 0;
+  g2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+  if (g1count[0] != f1count[0]) abort ();
+  if (g2count[0] != f1count[0]) abort ();
+  if (g1count[1] != f1count[1]) abort ();
+  if (g2count[1] != f1count[1]) abort ();
+  if (g1count[2] != f1count[2]) abort ();
+  if (g2count[2] != f1count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-class-1.C b/libgomp/testsuite/libgomp.c++/imperfect-class-1.C
new file mode 100644
index 00000000000..3c39c42c107
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-class-1.C
@@ -0,0 +1,169 @@ 
+// { dg-do run }
+// Test that class iterators and imperfectly-nested loops work together.
+// This variant tests initialization by assignment.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef int T;
+typedef int S;
+
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  friend bool operator == (I &, I &);
+  friend bool operator == (const I &, const I &);
+  friend bool operator < (I &, I &);
+  friend bool operator < (const I &, const I &);
+  friend bool operator <= (I &, I &);
+  friend bool operator <= (const I &, const I &);
+  friend bool operator > (I &, I &);
+  friend bool operator > (const I &, const I &);
+  friend bool operator >= (I &, I &);
+  friend bool operator >= (const I &, const I &);
+  friend typename I::difference_type operator - (I &, I &);
+  friend typename I::difference_type operator - (const I &, const I &);
+  friend I operator + (typename I::difference_type , const I &);
+private:
+  T *p;
+};
+ I::I () : p (0) {}
+ I::~I () { p = (T *) 0; }
+ I::I (T *x) : p (x) {}
+ I::I (const I &x) : p (x.p) {}
+ T &I::operator * () { return *p; }
+ T *I::operator -> () { return p; }
+ T &I::operator [] (const difference_type &x) const { return p[x]; }
+ I &I::operator = (const I &x) { p = x.p; return *this; }
+ I &I::operator ++ () { ++p; return *this; }
+ I I::operator ++ (int) { return I (p++); }
+ I &I::operator -- () { --p; return *this; }
+ I I::operator -- (int) { return I (p--); }
+ I &I::operator += (const difference_type &x) { p += x; return *this; }
+ I &I::operator -= (const difference_type &x) { p -= x; return *this; }
+ I I::operator + (const difference_type &x) const { return I (p + x); }
+ I I::operator - (const difference_type &x) const { return I (p - x); }
+ bool operator == (I &x, I &y) { return x.p == y.p; }
+ bool operator == (const I &x, const I &y) { return x.p == y.p; }
+ bool operator != (I &x, I &y) { return !(x == y); }
+ bool operator != (const I &x, const I &y) { return !(x == y); }
+ bool operator < (I &x, I &y) { return x.p < y.p; }
+ bool operator < (const I &x, const I &y) { return x.p < y.p; }
+ bool operator <= (I &x, I &y) { return x.p <= y.p; }
+ bool operator <= (const I &x, const I &y) { return x.p <= y.p; }
+ bool operator > (I &x, I &y) { return x.p > y.p; }
+ bool operator > (const I &x, const I &y) { return x.p > y.p; }
+ bool operator >= (I &x, I &y) { return x.p >= y.p; }
+ bool operator >= (const I &x, const I &y) { return x.p >= y.p; }
+ typename I::difference_type operator - (I &x, I &y) { return x.p - y.p; }
+ typename I::difference_type operator - (const I &x, const I &y) { return x.p - y.p; }
+ I operator + (typename I::difference_type x, const I &y) { return I (x + y.p); }
+
+class J
+{
+ public:
+ J(const I &x, const I &y) : b (x), e (y) {}
+ const I &begin ();
+ const I &end ();
+ private:
+ I b, e;
+};
+
+const I &J::begin () { return b; }
+const I &J::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+void s1 (J a1, J a2, J a3)
+{
+  I i, j, k;
+
+#pragma omp for collapse(3)
+  for (i = a1.begin (); i < a1.end (); i++)
+    {
+      f1 (0);
+      for (j = a2.begin (); j < a2.end (); j++)
+	{
+	  f1 (1);
+	  for (k = a3.begin (); k < a3.end (); k++)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J x (&index[0], &index[3]);
+  J y (&index[0], &index[4]);
+  J z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1 (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-class-2.C b/libgomp/testsuite/libgomp.c++/imperfect-class-2.C
new file mode 100644
index 00000000000..c6b657cabba
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-class-2.C
@@ -0,0 +1,167 @@ 
+// { dg-do run }
+// Test that class iterators and imperfectly-nested loops work together.
+// This variant tests loop initialization by declaration.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef int T;
+typedef int S;
+
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  friend bool operator == (I &, I &);
+  friend bool operator == (const I &, const I &);
+  friend bool operator < (I &, I &);
+  friend bool operator < (const I &, const I &);
+  friend bool operator <= (I &, I &);
+  friend bool operator <= (const I &, const I &);
+  friend bool operator > (I &, I &);
+  friend bool operator > (const I &, const I &);
+  friend bool operator >= (I &, I &);
+  friend bool operator >= (const I &, const I &);
+  friend typename I::difference_type operator - (I &, I &);
+  friend typename I::difference_type operator - (const I &, const I &);
+  friend I operator + (typename I::difference_type , const I &);
+private:
+  T *p;
+};
+ I::I () : p (0) {}
+ I::~I () { p = (T *) 0; }
+ I::I (T *x) : p (x) {}
+ I::I (const I &x) : p (x.p) {}
+ T &I::operator * () { return *p; }
+ T *I::operator -> () { return p; }
+ T &I::operator [] (const difference_type &x) const { return p[x]; }
+ I &I::operator = (const I &x) { p = x.p; return *this; }
+ I &I::operator ++ () { ++p; return *this; }
+ I I::operator ++ (int) { return I (p++); }
+ I &I::operator -- () { --p; return *this; }
+ I I::operator -- (int) { return I (p--); }
+ I &I::operator += (const difference_type &x) { p += x; return *this; }
+ I &I::operator -= (const difference_type &x) { p -= x; return *this; }
+ I I::operator + (const difference_type &x) const { return I (p + x); }
+ I I::operator - (const difference_type &x) const { return I (p - x); }
+ bool operator == (I &x, I &y) { return x.p == y.p; }
+ bool operator == (const I &x, const I &y) { return x.p == y.p; }
+ bool operator != (I &x, I &y) { return !(x == y); }
+ bool operator != (const I &x, const I &y) { return !(x == y); }
+ bool operator < (I &x, I &y) { return x.p < y.p; }
+ bool operator < (const I &x, const I &y) { return x.p < y.p; }
+ bool operator <= (I &x, I &y) { return x.p <= y.p; }
+ bool operator <= (const I &x, const I &y) { return x.p <= y.p; }
+ bool operator > (I &x, I &y) { return x.p > y.p; }
+ bool operator > (const I &x, const I &y) { return x.p > y.p; }
+ bool operator >= (I &x, I &y) { return x.p >= y.p; }
+ bool operator >= (const I &x, const I &y) { return x.p >= y.p; }
+ typename I::difference_type operator - (I &x, I &y) { return x.p - y.p; }
+ typename I::difference_type operator - (const I &x, const I &y) { return x.p - y.p; }
+ I operator + (typename I::difference_type x, const I &y) { return I (x + y.p); }
+
+class J
+{
+ public:
+ J(const I &x, const I &y) : b (x), e (y) {}
+ const I &begin ();
+ const I &end ();
+ private:
+ I b, e;
+};
+
+const I &J::begin () { return b; }
+const I &J::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+void s1 (J a1, J a2, J a3)
+{
+#pragma omp for collapse(3)
+  for (I i = a1.begin (); i < a1.end (); i++)
+    {
+      f1 (0);
+      for (I j = a2.begin (); j < a2.end (); j++)
+	{
+	  f1 (1);
+	  for (I k = a3.begin (); k < a3.end (); k++)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J x (&index[0], &index[3]);
+  J y (&index[0], &index[4]);
+  J z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1 (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-class-3.C b/libgomp/testsuite/libgomp.c++/imperfect-class-3.C
new file mode 100644
index 00000000000..c33826a6b36
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-class-3.C
@@ -0,0 +1,167 @@ 
+// { dg-do run }
+// Test that class iterators and imperfectly-nested loops work together.
+// This variant tests range for.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef int T;
+typedef int S;
+
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  friend bool operator == (I &, I &);
+  friend bool operator == (const I &, const I &);
+  friend bool operator < (I &, I &);
+  friend bool operator < (const I &, const I &);
+  friend bool operator <= (I &, I &);
+  friend bool operator <= (const I &, const I &);
+  friend bool operator > (I &, I &);
+  friend bool operator > (const I &, const I &);
+  friend bool operator >= (I &, I &);
+  friend bool operator >= (const I &, const I &);
+  friend typename I::difference_type operator - (I &, I &);
+  friend typename I::difference_type operator - (const I &, const I &);
+  friend I operator + (typename I::difference_type , const I &);
+private:
+  T *p;
+};
+ I::I () : p (0) {}
+ I::~I () { p = (T *) 0; }
+ I::I (T *x) : p (x) {}
+ I::I (const I &x) : p (x.p) {}
+ T &I::operator * () { return *p; }
+ T *I::operator -> () { return p; }
+ T &I::operator [] (const difference_type &x) const { return p[x]; }
+ I &I::operator = (const I &x) { p = x.p; return *this; }
+ I &I::operator ++ () { ++p; return *this; }
+ I I::operator ++ (int) { return I (p++); }
+ I &I::operator -- () { --p; return *this; }
+ I I::operator -- (int) { return I (p--); }
+ I &I::operator += (const difference_type &x) { p += x; return *this; }
+ I &I::operator -= (const difference_type &x) { p -= x; return *this; }
+ I I::operator + (const difference_type &x) const { return I (p + x); }
+ I I::operator - (const difference_type &x) const { return I (p - x); }
+ bool operator == (I &x, I &y) { return x.p == y.p; }
+ bool operator == (const I &x, const I &y) { return x.p == y.p; }
+ bool operator != (I &x, I &y) { return !(x == y); }
+ bool operator != (const I &x, const I &y) { return !(x == y); }
+ bool operator < (I &x, I &y) { return x.p < y.p; }
+ bool operator < (const I &x, const I &y) { return x.p < y.p; }
+ bool operator <= (I &x, I &y) { return x.p <= y.p; }
+ bool operator <= (const I &x, const I &y) { return x.p <= y.p; }
+ bool operator > (I &x, I &y) { return x.p > y.p; }
+ bool operator > (const I &x, const I &y) { return x.p > y.p; }
+ bool operator >= (I &x, I &y) { return x.p >= y.p; }
+ bool operator >= (const I &x, const I &y) { return x.p >= y.p; }
+ typename I::difference_type operator - (I &x, I &y) { return x.p - y.p; }
+ typename I::difference_type operator - (const I &x, const I &y) { return x.p - y.p; }
+ I operator + (typename I::difference_type x, const I &y) { return I (x + y.p); }
+
+class J
+{
+ public:
+ J(const I &x, const I &y) : b (x), e (y) {}
+ const I &begin ();
+ const I &end ();
+ private:
+ I b, e;
+};
+
+const I &J::begin () { return b; }
+const I &J::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+void s1 (J a1, J a2, J a3)
+{
+#pragma omp for collapse(3)
+  for (auto i : a1)
+    {
+      f1 (0);
+      for (auto j : a2)
+	{
+	  f1 (1);
+	  for (auto k : a3)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J x (&index[0], &index[3]);
+  J y (&index[0], &index[4]);
+  J z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1 (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-destructor.C b/libgomp/testsuite/libgomp.c++/imperfect-destructor.C
new file mode 100644
index 00000000000..bd87760e076
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-destructor.C
@@ -0,0 +1,135 @@ 
+/* { dg-do run } */
+
+/* Make sure destructors are called for class variables bound
+   in intervening code.  */
+
+static int f1count[3], f2count[3];
+static int g1count[3], g2count[3];
+
+static int ccount[3], dcount[3];
+
+class C {
+ public:
+  int n;
+  C (int nn) { n = nn; ccount[n]++; }
+  ~C () { dcount[n]++; n = 0; }
+};
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+int f1 (int depth, int iter)
+{
+  f1count[depth]++;
+  return iter;
+}
+
+int f2 (int depth, int iter)
+{
+  f2count[depth]++;
+  return iter;
+}
+
+int g1 (int depth, int iter)
+{
+  g1count[depth]++;
+  return iter;
+}
+
+int g2 (int depth, int iter)
+{
+  g2count[depth]++;
+  return iter;
+}
+
+void s1 (int a1, int a2, int a3)
+{
+  int i, j, k;
+
+#pragma omp for collapse(3)
+  for (i = 0; i < a1; i++)
+    {
+      C local0(0);
+      f1 (local0.n, i);
+      {
+	g1 (local0.n, i);
+	for (j = 0; j < a2; j++)
+	  {
+	    C local1(1);
+	    f1 (local1.n, j);
+	    {
+	      g1 (local1.n, j);
+	      for (k = 0; k < a3; k++)
+		{
+		  C local2(2);
+		  f1 (local2.n, k);
+		  {
+		    g1 (local2.n, k);
+		    g2 (local2.n, k);
+		  }
+		  f2 (local2.n, k);
+		}
+	      g2 (local1.n, j);
+	    }
+	  f2 (local1.n, j);
+	  }
+	g2 (local0.n, i);
+      }
+      f2 (local0.n, i);
+    }
+}
+
+int
+main (void)
+{
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  g1count[0] = 0;
+  g1count[1] = 0;
+  g1count[2] = 0;
+  g2count[0] = 0;
+  g2count[1] = 0;
+  g2count[2] = 0;
+
+  s1 (3, 4, 5);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+  if (g1count[0] != f1count[0]) abort ();
+  if (g2count[0] != f1count[0]) abort ();
+  if (g1count[1] != f1count[1]) abort ();
+  if (g2count[1] != f1count[1]) abort ();
+  if (g1count[2] != f1count[2]) abort ();
+  if (g2count[2] != f1count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+
+  /* Check that each class object declared in intervening code was
+     constructed and destructed an equal number of times.  */
+  if (ccount[0] != dcount[0]) abort ();
+  if (ccount[1] != dcount[1]) abort ();
+  if (ccount[2] != dcount[2]) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-template-1.C b/libgomp/testsuite/libgomp.c++/imperfect-template-1.C
new file mode 100644
index 00000000000..4ed96c8319b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-template-1.C
@@ -0,0 +1,172 @@ 
+// { dg-do run }
+// Test that template class iterators and imperfectly-nested loops
+// work together.
+// This variant tests initialization by assignment.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template <typename S> friend bool operator == (I<S> &, I<S> &);
+  template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator < (I<S> &, I<S> &);
+  template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator <= (I<S> &, I<S> &);
+  template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator > (I<S> &, I<S> &);
+  template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator >= (I<S> &, I<S> &);
+  template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+  template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+  T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () { p = (T *) 0; }
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+  J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+  const I<T> &begin ();
+  const I<T> &end ();
+private:
+  I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+template <typename T>
+void s1 (J<T> a1, J<T> a2, J<T> a3)
+{
+  I<T> i, j, k;
+
+#pragma omp for collapse(3)
+  for (i = a1.begin (); i < a1.end (); i++)
+    {
+      f1 (0);
+      for (j = a2.begin (); j < a2.end (); j++)
+	{
+	  f1 (1);
+	  for (k = a3.begin (); k < a3.end (); k++)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J<int> x (&index[0], &index[3]);
+  J<int> y (&index[0], &index[4]);
+  J<int> z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1<int> (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-template-2.C b/libgomp/testsuite/libgomp.c++/imperfect-template-2.C
new file mode 100644
index 00000000000..a41c87c481f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-template-2.C
@@ -0,0 +1,170 @@ 
+// { dg-do run }
+// Test that template class iterators and imperfectly-nested loops
+// work together.
+// This variant tests initialization by declaration.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template <typename S> friend bool operator == (I<S> &, I<S> &);
+  template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator < (I<S> &, I<S> &);
+  template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator <= (I<S> &, I<S> &);
+  template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator > (I<S> &, I<S> &);
+  template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator >= (I<S> &, I<S> &);
+  template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+  template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+  T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () { p = (T *) 0; }
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+  J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+  const I<T> &begin ();
+  const I<T> &end ();
+private:
+  I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+template <typename T>
+void s1 (J<T> a1, J<T> a2, J<T> a3)
+{
+#pragma omp for collapse(3)
+  for (I<T> i = a1.begin (); i < a1.end (); i++)
+    {
+      f1 (0);
+      for (I<T> j = a2.begin (); j < a2.end (); j++)
+	{
+	  f1 (1);
+	  for (I<T> k = a3.begin (); k < a3.end (); k++)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J<int> x (&index[0], &index[3]);
+  J<int> y (&index[0], &index[4]);
+  J<int> z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1<int> (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/imperfect-template-3.C b/libgomp/testsuite/libgomp.c++/imperfect-template-3.C
new file mode 100644
index 00000000000..2e464ed5510
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/imperfect-template-3.C
@@ -0,0 +1,170 @@ 
+// { dg-do run }
+// Test that template class iterators and imperfectly-nested loops
+// work together.
+// This variant tests range for syntax.
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+extern "C" void abort ();
+
+template <typename T>
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template <typename S> friend bool operator == (I<S> &, I<S> &);
+  template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator < (I<S> &, I<S> &);
+  template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator <= (I<S> &, I<S> &);
+  template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator > (I<S> &, I<S> &);
+  template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator >= (I<S> &, I<S> &);
+  template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+  template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+  T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () { p = (T *) 0; }
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+  J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+  const I<T> &begin ();
+  const I<T> &end ();
+private:
+  I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+static int f1count[3], f2count[3];
+
+#ifndef __cplusplus
+extern void abort (void);
+#else
+extern "C" void abort (void);
+#endif
+
+void f1 (int depth)
+{
+  f1count[depth]++;
+}
+
+void f2 (int depth)
+{
+  f2count[depth]++;
+}
+
+template <typename T>
+void s1 (J<T> a1, J<T> a2, J<T> a3)
+{
+#pragma omp for collapse(3)
+  for (auto i : a1)
+    {
+      f1 (0);
+      for (auto j : a2)
+	{
+	  f1 (1);
+	  for (auto k : a3)
+	    {
+	      f1 (2);
+	      f2 (2);
+	    }
+	  f2 (1);
+	}
+      f2 (0);
+    }
+}
+
+
+int
+main (void)
+{
+
+  int index[] = {0, 1, 2, 3, 4, 5};
+
+  J<int> x (&index[0], &index[3]);
+  J<int> y (&index[0], &index[4]);
+  J<int> z (&index[0], &index[5]);
+
+  f1count[0] = 0;
+  f1count[1] = 0;
+  f1count[2] = 0;
+  f2count[0] = 0;
+  f2count[1] = 0;
+  f2count[2] = 0;
+
+  s1<int> (x, y, z);
+
+  /* All intervening code at the same depth must be executed the same
+     number of times. */
+  if (f1count[0] != f2count[0]) abort ();
+  if (f1count[1] != f2count[1]) abort ();
+  if (f1count[2] != f2count[2]) abort ();
+
+  /* Intervening code must be executed at least as many times as the loop
+     that encloses it. */
+  if (f1count[0] < 3) abort ();
+  if (f1count[1] < 3 * 4) abort ();
+
+  /* Intervening code must not be executed more times than the number
+     of logical iterations. */
+  if (f1count[0] > 3 * 4 * 5) abort ();
+  if (f1count[1] > 3 * 4 * 5) abort ();
+
+  /* Check that the innermost loop body is executed exactly the number
+     of logical iterations expected. */
+  if (f1count[2] != 3 * 4 * 5) abort ();
+}