diff mbox

[PING] _Cilk_for for C and C++

Message ID BF230D13CA30DD48930C31D4099330003A4CEF3B@FMSMSX101.amr.corp.intel.com
State New
Headers show

Commit Message

Iyer, Balaji V Feb. 17, 2014, 6:42 a.m. UTC
Hi Jakub,
	I still couldn't reproduce the issue you pointed me below, but I have fixed the other issues you have mentioned. I have also ported the test case that you mentioned (for1.C), but I have some questions about the changes and would like to confirm it with a colleague to make sure what I am doing is correct. Monday is a holiday here, and so I won't be able to do it till Tuesday. But, in the mean time I am attaching the fixed patch. Can you please look at it and let me know the other things I need to change?

Thanks,

Balaji V. Iyer.

> -----Original Message-----
> From: Jakub Jelinek [mailto:jakub@redhat.com]
> Sent: Wednesday, February 12, 2014 12:10 PM
> To: Iyer, Balaji V
> Cc: 'Jason Merrill'; 'Jeff Law'; 'Aldy Hernandez'; 'gcc-patches@gcc.gnu.org';
> 'rth@redhat.com'
> Subject: Re: [PING] [PATCH] _Cilk_for for C and C++
> 
> On Wed, Feb 12, 2014 at 05:04:38PM +0000, Iyer, Balaji V wrote:
> > I looked at the test code you send me (cf3.cc) at -O1 and it is
> > removing all the lines you have shown above.  Yes, I would imagine -O0
> > to have code that can be redundant or unnecessary.  Some of it could
> > be the artifact of internal code insertion.  But isn't the main job of
> > the instruction scheduler to remove all these redundant work?
> > Besides, it is just a function call.  The compiler at -O2, -O and -O3
> > removes the chunk of code that you mentioned.
> 
> As I said, just change the testcase so that the operator isn't inline, and
> suddenly even -O3 will not be able to remove the call.
> 
> 	Jakub
diff mbox

Patch

diff --git a/gcc/c-family/c-cilkplus.c b/gcc/c-family/c-cilkplus.c
index 1a16f66..1be12bd 100644
--- a/gcc/c-family/c-cilkplus.c
+++ b/gcc/c-family/c-cilkplus.c
@@ -91,3 +91,52 @@  c_finish_cilk_clauses (tree clauses)
     }
   return clauses;
 }
+
+/* Structure used to pass information into a walk_tree function and
+   find_cilk_for.  */
+struct clause_struct
+{
+  bool is_set;
+  tree clauses;
+};
+
+/* Helper function for walk_tree used in cilk_for_move_clauses_upward.
+   If *TP is a CILK_FOR statement, then set *DATA (type-casted to 
+   struct clause_struct) with its clauses.  */
+
+static tree
+find_cilk_for (tree *tp, int *walk_subtrees, void *data)
+{
+  struct clause_struct *cstruct = (struct clause_struct *) data;
+  if (*tp && TREE_CODE (*tp) == CILK_FOR && !cstruct->is_set)
+    {
+      cstruct->is_set = true;
+      cstruct->clauses = OMP_FOR_CLAUSES (*tp);
+      *walk_subtrees = 0;
+    }
+  return NULL_TREE;
+}
+
+/* Moves the IF-CLAUSE and SCHEDULE clause from _CILK_FOR statement in
+   STMT into *PARALLEL_CLAUSES.  */
+ 
+void
+cilk_for_move_clauses_upward (tree *parallel_clauses, tree stmt)
+{
+  struct clause_struct cstruct;
+  cstruct.is_set = false;
+  cstruct.clauses = NULL_TREE;
+  walk_tree (&stmt, find_cilk_for, (void *) &cstruct, NULL);
+
+  tree clauses = cstruct.clauses;
+  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SCHEDULE
+	|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+      {
+	if (*parallel_clauses)
+	  OMP_CLAUSE_CHAIN (*parallel_clauses) = c;
+	else
+	  *parallel_clauses = c;
+      }
+}
+
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index bfc5797..eb6e2fb 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -416,6 +416,7 @@  const struct c_common_resword c_common_reswords[] =
   { "_Complex",		RID_COMPLEX,	0 },
   { "_Cilk_spawn",      RID_CILK_SPAWN, 0 },
   { "_Cilk_sync",       RID_CILK_SYNC,  0 },
+  { "_Cilk_for",        RID_CILK_FOR,   0 },
   { "_Imaginary",	RID_IMAGINARY, D_CONLY },
   { "_Decimal32",       RID_DFLOAT32,  D_CONLY | D_EXT },
   { "_Decimal64",       RID_DFLOAT64,  D_CONLY | D_EXT },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
old mode 100644
new mode 100755
index f074ab1..509490c
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -149,7 +149,7 @@  enum rid
   RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
 
   /* Cilk Plus keywords.  */
-  RID_CILK_SPAWN, RID_CILK_SYNC,
+  RID_CILK_SPAWN, RID_CILK_SYNC, RID_CILK_FOR,
   
   /* Objective-C ("AT" reserved words - they are only keywords when
      they follow '@')  */
@@ -1203,7 +1203,7 @@  extern void c_finish_omp_flush (location_t);
 extern void c_finish_omp_taskwait (location_t);
 extern void c_finish_omp_taskyield (location_t);
 extern tree c_finish_omp_for (location_t, enum tree_code, tree, tree, tree,
-			      tree, tree, tree);
+			      tree, tree, tree, tree *, tree *, tree *);
 extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
 				 tree, tree *);
 extern tree c_omp_declare_simd_clauses_to_numbers (tree, tree);
@@ -1389,4 +1389,5 @@  extern tree make_cilk_frame (tree);
 extern tree create_cilk_function_exit (tree, bool, bool);
 extern tree cilk_install_body_pedigree_operations (tree);
 extern void cilk_outline (tree, tree *, void *);
+extern void cilk_for_move_clauses_upward (tree *, tree);
 #endif /* ! GCC_C_COMMON_H */
diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
old mode 100644
new mode 100755
index dd0a45d..0b4259c
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -386,17 +386,19 @@  c_omp_for_incr_canonicalize_ptr (location_t loc, tree decl, tree incr)
    INITV, CONDV and INCRV are vectors containing initialization
    expressions, controlling predicates and increment expressions.
    BODY is the body of the loop and PRE_BODY statements that go before
-   the loop.  */
+   the loop.  *COUNT is the loop-count and is used solely by a _Cilk_for 
+   statement.  */
 
 tree
 c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
-		  tree initv, tree condv, tree incrv, tree body, tree pre_body)
+		  tree initv, tree condv, tree incrv, tree body,
+		  tree pre_body, tree *cinit, tree *cend, tree *cstep)
 {
   location_t elocus;
   bool fail = false;
   int i;
-
-  if (code == CILK_SIMD
+  tree orig_init = NULL_TREE, orig_end = NULL_TREE, orig_step = NULL_TREE;
+  if ((code == CILK_SIMD || code == CILK_FOR) 
       && !c_check_cilk_loop (locus, TREE_VEC_ELT (declv, 0)))
     fail = true;
 
@@ -422,6 +424,8 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	  fail = true;
 	}
 
+      if (TREE_CODE (init) == MODIFY_EXPR)
+	orig_init = TREE_OPERAND (init, 1);
       /* In the case of "for (int i = 0...)", init will be a decl.  It should
 	 have a DECL_INITIAL that we can turn into an assignment.  */
       if (init == decl)
@@ -436,6 +440,7 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      fail = true;
 	    }
 
+	  orig_init = init;
 	  init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR,
 	      			    /* FIXME diagnostics: This should
 				       be the location of the INIT.  */
@@ -526,9 +531,20 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 					    0))
 		    TREE_SET_CODE (cond, TREE_CODE (cond) == NE_EXPR
 					 ? LT_EXPR : GE_EXPR);
-		  else if (code != CILK_SIMD)
+		  else if (code != CILK_SIMD && code != CILK_FOR)
 		    cond_ok = false;
 		}
+	      if (flag_cilkplus && code == CILK_FOR)
+		{ 
+		  orig_end = TREE_OPERAND (cond, 1);
+		  tree add_expr = build_zero_cst (TREE_TYPE (orig_end));
+		  if (TREE_CODE (cond) == LE_EXPR)
+		    add_expr = build_one_cst (TREE_TYPE (orig_end));
+		  else if (TREE_CODE (cond) == GE_EXPR)
+		    add_expr = build_int_cst (TREE_TYPE (orig_end), -1);
+		  orig_end = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_end),
+					  orig_end, add_expr);
+		}
 	    }
 
 	  if (!cond_ok)
@@ -561,6 +577,18 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      if (TREE_OPERAND (incr, 0) != decl)
 		break;
 
+	      if (TREE_CODE (incr) == POSTINCREMENT_EXPR
+		  || TREE_CODE (incr) == PREINCREMENT_EXPR)
+		orig_step = build_one_cst (TREE_TYPE (incr));
+	      else
+		orig_step = integer_minus_one_node;
+ 
+	      if (POINTER_TYPE_P (TREE_TYPE (incr)))
+		{
+		  tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (incr)));
+		  orig_step = fold_build2 (MULT_EXPR, TREE_TYPE (orig_step),
+					   orig_step, unit);
+		}
 	      incr_ok = true;
 	      incr = c_omp_for_incr_canonicalize_ptr (elocus, decl, incr);
 	      break;
@@ -579,14 +607,24 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      if (TREE_CODE (TREE_OPERAND (incr, 1)) == PLUS_EXPR
 		  && (TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl
 		      || TREE_OPERAND (TREE_OPERAND (incr, 1), 1) == decl))
-		incr_ok = true;
+		{
+		  if (TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl)
+		    orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 1);
+		  else
+		    orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 0);
+		  incr_ok = true;
+		}
 	      else if ((TREE_CODE (TREE_OPERAND (incr, 1)) == MINUS_EXPR
 			|| (TREE_CODE (TREE_OPERAND (incr, 1))
 			    == POINTER_PLUS_EXPR))
 		       && TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl)
-		incr_ok = true;
+		{
+		  orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 1);
+		  incr_ok = true;
+		}
 	      else
 		{
+		  orig_step = TREE_OPERAND (incr, 1);
 		  tree t = check_omp_for_incr_expr (elocus,
 						    TREE_OPERAND (incr, 1),
 						    decl);
@@ -609,6 +647,14 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	    }
 	}
 
+      /* These variables could be NULL if an error occurred.  */
+      if (flag_cilkplus && code == CILK_FOR 
+	  && orig_end && orig_init && orig_step)
+	{
+	  *cinit = orig_init;
+	  *cend = orig_end;
+	  *cstep = orig_step;
+	}
       TREE_VEC_ELT (initv, i) = init;
       TREE_VEC_ELT (incrv, i) = incr;
     }
diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 07d23ac..e0f3561 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1394,6 +1394,11 @@  init_pragma (void)
 
   cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				false);
+
+  if (flag_cilkplus && !flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
+				  PRAGMA_CILK_GRAINSIZE, true, false);
+
 #ifdef HANDLE_PRAGMA_PACK_WITH_EXPANSION
   c_register_pragma_with_expansion (0, "pack", handle_pragma_pack);
 #else
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 6f1bf74..b9f09ba 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -55,6 +55,9 @@  typedef enum pragma_kind {
   /* Top level clause to handle all Cilk Plus pragma simd clauses.  */
   PRAGMA_CILK_SIMD,
 
+  /* This pragma handles setting of grainsize for a _Cilk_for.  */
+  PRAGMA_CILK_GRAINSIZE,
+
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
old mode 100644
new mode 100755
index 66625aa..ff2c224
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -1248,10 +1248,11 @@  static bool c_parser_objc_diagnose_bad_element_prefix
   (c_parser *, struct c_declspecs *);
 
 /* Cilk Plus supporting routines.  */
-static void c_parser_cilk_simd (c_parser *);
+static void c_parser_cilk_simd (c_parser *, bool, tree);
 static bool c_parser_cilk_verify_simd (c_parser *, enum pragma_context);
 static tree c_parser_array_notation (location_t, c_parser *, tree, tree);
 static tree c_parser_cilk_clause_vectorlength (c_parser *, tree, bool);
+static void c_parser_cilk_grainsize (c_parser *);
 
 /* Parse a translation unit (C90 6.7, C99 6.9).
 
@@ -4878,6 +4879,16 @@  c_parser_statement_after_labels (c_parser *parser)
 	case RID_FOR:
 	  c_parser_for_statement (parser, false);
 	  break;
+	case RID_CILK_FOR:
+	  if (!flag_cilkplus)
+	    {
+	      error_at (c_parser_peek_token (parser)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      c_parser_skip_to_end_of_block_or_statement (parser);
+	    }
+	  else
+	    c_parser_cilk_simd (parser, true, integer_zero_node);
+	  break;
 	case RID_CILK_SYNC:
 	  c_parser_consume_token (parser);
 	  c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
@@ -9496,7 +9507,24 @@  c_parser_pragma (c_parser *parser, enum pragma_context context)
       if (!c_parser_cilk_verify_simd (parser, context))
 	return false;
       c_parser_consume_pragma (parser);
-      c_parser_cilk_simd (parser);
+      c_parser_cilk_simd (parser, false, NULL_TREE);
+      return false;
+    case PRAGMA_CILK_GRAINSIZE:
+      if (!flag_cilkplus)
+	{
+	  warning (0, "%<#pragma grainsize%> ignored because -fcilkplus is not"
+		   " enabled");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      if (context == pragma_external)
+	{
+	  error_at (c_parser_peek_token (parser)->location,
+		    "%<#pragma grainsize%> must be inside a function");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      c_parser_cilk_grainsize (parser);
       return false;
 
     default:
@@ -11591,7 +11619,7 @@  c_parser_omp_flush (c_parser *parser)
 
 static tree
 c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
-		       tree clauses, tree *cclauses)
+		       tree clauses, tree grain, tree *cclauses)
 {
   tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl;
   tree declv, condv, incrv, initv, ret = NULL;
@@ -11599,6 +11627,7 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   int i, collapse = 1, nbraces = 0;
   location_t for_loc;
   vec<tree, va_gc> *for_block = make_tree_vector ();
+  tree count = NULL_TREE;
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
@@ -11611,11 +11640,18 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   condv = make_tree_vec (collapse);
   incrv = make_tree_vec (collapse);
 
-  if (!c_parser_next_token_is_keyword (parser, RID_FOR))
+  if (code != CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_FOR))
     {
       c_parser_error (parser, "for statement expected");
       return NULL;
     }
+  if (code == CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_CILK_FOR))
+    {
+      c_parser_error (parser, "_Cilk_for statement expected");
+      return NULL;
+    }
   for_loc = c_parser_peek_token (parser)->location;
   c_parser_consume_token (parser);
 
@@ -11693,7 +11729,7 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 	    case LE_EXPR:
 	      break;
 	    case NE_EXPR:
-	      if (code == CILK_SIMD)
+	      if (code == CILK_SIMD || code == CILK_FOR)
 		break;
 	      /* FALLTHRU.  */
 	    default:
@@ -11826,8 +11862,9 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
      an error from the initialization parsing.  */
   if (!fail)
     {
+      tree cf_init = NULL_TREE, cf_end = NULL_TREE, cf_step = NULL_TREE;
       stmt = c_finish_omp_for (loc, code, declv, initv, condv,
-			       incrv, body, NULL);
+			       incrv, body, NULL, &cf_init, &cf_end, &cf_step);
       if (stmt)
 	{
 	  if (cclauses != NULL
@@ -11867,6 +11904,28 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 		  }
 	    }
 	  OMP_FOR_CLAUSES (stmt) = clauses;
+	  /* If it is a _Cilk_for statement, then the OMP_FOR_CLAUSES location
+	     stores the user-defined grain value or an integer_zero_node 
+	     indicating that the runtime must compute a suitable grain, inside
+	     a SCHEDULE clause.  Similarly the loop-count is also stored in
+	     a IF clause.  These clauses do not make sense for _Cilk_for but
+	     it is just used to transmit information.  */
+	  if (code == CILK_FOR)
+	    {
+	      count = fold_build2 (MINUS_EXPR, TREE_TYPE (cf_end), cf_end,
+				   cf_init);
+	      count = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (count), count,
+				   cf_step);
+	      tree l = build_omp_clause (EXPR_LOCATION (grain),
+					 OMP_CLAUSE_SCHEDULE);
+	      OMP_CLAUSE_SCHEDULE_KIND (l) = OMP_CLAUSE_SCHEDULE_CILKFOR;
+	      OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (l) = grain;
+	      OMP_CLAUSE_CHAIN (l) = OMP_FOR_CLAUSES (stmt);
+	      tree c = build_omp_clause (EXPR_LOCATION (count), OMP_CLAUSE_IF);
+	      OMP_CLAUSE_IF_EXPR (c) = count;
+	      OMP_CLAUSE_CHAIN (c) = l;
+	      OMP_FOR_CLAUSES (stmt) = c;
+	    }
 	}
       ret = stmt;
     }
@@ -11931,7 +11990,8 @@  c_parser_omp_simd (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_SIMD, clauses, cclauses);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_SIMD, clauses, NULL_TREE,
+			       cclauses);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -12011,7 +12071,8 @@  c_parser_omp_for (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_FOR, clauses, cclauses);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_FOR, clauses, NULL_TREE,
+			       cclauses);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -12494,7 +12555,8 @@  c_parser_omp_distribute (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_DISTRIBUTE, clauses, NULL);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_DISTRIBUTE, clauses, NULL_TREE,
+			       NULL);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -13771,18 +13833,84 @@  c_parser_cilk_all_clauses (c_parser *parser)
   return c_finish_cilk_clauses (clauses);
 }
 
-/* Main entry point for parsing Cilk Plus <#pragma simd> for
-   loops.  */
+/* This function helps parse the grainsize pragma for a _Cilk_for statement. 
+   Here is the correct syntax of this pragma: 
+	    #pragma cilk grainsize = <EXP> 
+ */
 
 static void
-c_parser_cilk_simd (c_parser *parser)
+c_parser_cilk_grainsize (c_parser *parser)
 {
-  tree clauses = c_parser_cilk_all_clauses (parser);
+  extern tree convert_to_integer (tree, tree);
+
+  /* consume the 'grainsize' keyword.  */
+  c_parser_consume_pragma (parser);
+
+  if (c_parser_require (parser, CPP_EQ, "expected %<=%>") != 0)
+    {
+      struct c_expr g_expr = c_parser_binary_expression (parser, NULL, NULL);
+      if (g_expr.value && TREE_CODE (g_expr.value) == C_MAYBE_CONST_EXPR)
+	{
+	  error_at (input_location, "cannot convert grain to long integer.\n");
+	  c_parser_skip_to_pragma_eol (parser);
+	}   
+      else if (g_expr.value && g_expr.value != error_mark_node)
+	{
+	  c_parser_skip_to_pragma_eol (parser);
+	  c_token *token = c_parser_peek_token (parser);
+	  if (token && token->type == CPP_KEYWORD
+	      && token->keyword == RID_CILK_FOR)
+	    {
+	      /* Remove EXCESS_PRECISION_EXPR since we are going to convert
+		 it to long int.  */
+	      if (TREE_CODE (g_expr.value) == EXCESS_PRECISION_EXPR)
+		g_expr.value = TREE_OPERAND (g_expr.value, 0);
+	      tree grain = convert_to_integer (long_integer_type_node,
+					       g_expr.value);
+	      if (grain && grain != error_mark_node) 
+		c_parser_cilk_simd (parser, true, grain);
+	    }
+	  else
+	    warning (0, "grainsize pragma is not followed by %<_Cilk_for%>");
+	}
+      else
+	c_parser_skip_to_pragma_eol (parser);
+    }
+  else
+    c_parser_skip_to_pragma_eol (parser);
+}
+
+/* Main entry point for parsing Cilk Plus <#pragma simd> for and
+   _Cilk_for loops.  If IS_CILK_FOR is true then it is a _Cilk_for loop 
+   and GRAIN is the grain value passed in through pragma or 0.  */
+
+static void
+c_parser_cilk_simd (c_parser *parser, bool is_cilk_for, tree grain)
+{
+  tree super_block = NULL_TREE;
+  tree clauses = NULL_TREE;
+  
+  if (!is_cilk_for)
+    clauses = c_parser_cilk_all_clauses (parser);
+  else
+    super_block = c_begin_omp_parallel ();
   tree block = c_begin_compound_stmt (true);
   location_t loc = c_parser_peek_token (parser)->location;
-  c_parser_omp_for_loop (loc, parser, CILK_SIMD, clauses, NULL);
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  c_parser_omp_for_loop (loc, parser, code, clauses, grain, NULL);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
+  if (is_cilk_for)
+    {
+      /* Move all the clauses from the #pragma OMP for to #pragma omp parallel.
+	 This is because if these values are not integers and it is placed in
+	 OMP_FOR then the compiler will insert value chains for them.  */
+      tree parallel_clauses = NULL_TREE;
+      cilk_for_move_clauses_upward (&parallel_clauses, super_block);
+    /* The term super_block is not used in scheduling terms but in 
+       set-theory, i.e. set vs. super-set.  */ 
+      c_finish_omp_parallel (loc, parallel_clauses, super_block);
+    }
 }
 
 /* Parse a transaction attribute (GCC Extension).
diff --git a/gcc/cilk-builtins.def b/gcc/cilk-builtins.def
index 9f3240a..bf319d5 100644
--- a/gcc/cilk-builtins.def
+++ b/gcc/cilk-builtins.def
@@ -31,3 +31,5 @@  DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SYNC, "__cilkrts_sync")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_LEAVE_FRAME, "__cilkrts_leave_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_POP_FRAME, "__cilkrts_pop_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SAVE_FP, "__cilkrts_save_fp_ctrl_state")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_32, "__cilkrts_cilk_for_32")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_64, "__cilkrts_cilk_for_64")
diff --git a/gcc/cilk-common.c b/gcc/cilk-common.c
index a6a1aa2..d604651 100644
--- a/gcc/cilk-common.c
+++ b/gcc/cilk-common.c
@@ -105,6 +105,27 @@  install_builtin (const char *name, tree fntype, enum built_in_function code,
   return fndecl;
 }
 
+/* Returns a FUNCTION_DECL of type TYPE whose name is *NAME.  */
+
+static tree 
+declare_cilk_for_builtin (const char *name, tree type, 
+			  enum built_in_function code)
+{
+  tree cb, ft, fn;
+
+  cb = build_function_type_list (void_type_node,
+                                 ptr_type_node, type, type,
+                                 NULL_TREE);
+  cb = build_pointer_type (cb);
+  ft = build_function_type_list (void_type_node,
+                                 cb, ptr_type_node, type,
+                                 integer_type_node, NULL_TREE);
+  fn = install_builtin (name, ft, code, false);
+  TREE_NOTHROW (fn) = 0;
+
+  return fn;
+}
+
 /* Creates and initializes all the built-in Cilk keywords functions and three
    structures: __cilkrts_stack_frame, __cilkrts_pedigree and __cilkrts_worker.
    Detailed information about __cilkrts_stack_frame and
@@ -269,6 +290,14 @@  cilk_init_builtins (void)
   cilk_save_fp_fndecl = install_builtin ("__cilkrts_save_fp_ctrl_state", 
 					 fptr_fun, BUILT_IN_CILK_SAVE_FP,
 					 false);
+  /* __cilkrts_cilk_for_32 (...);  */
+  cilk_for_32_fndecl = declare_cilk_for_builtin ("__cilkrts_cilk_for_32", 
+						 unsigned_intSI_type_node, 
+						 BUILT_IN_CILK_FOR_32);
+  /* __cilkrts_cilk_for_64 (...);  */
+  cilk_for_64_fndecl = declare_cilk_for_builtin ("__cilkrts_cilk_for_64", 
+						 unsigned_intDI_type_node, 
+						 BUILT_IN_CILK_FOR_64);
 }
 
 /* Get the appropriate frame arguments for CALL that is of type CALL_EXPR.  */
diff --git a/gcc/cilk.h b/gcc/cilk.h
index ae96f53..1fee929 100644
--- a/gcc/cilk.h
+++ b/gcc/cilk.h
@@ -40,6 +40,9 @@  enum cilk_tree_index  {
   CILK_TI_F_POP,                      /* __cilkrts_pop_frame (...).  */
   CILK_TI_F_RETHROW,                  /* __cilkrts_rethrow (...).  */
   CILK_TI_F_SAVE_FP,                  /* __cilkrts_save_fp_ctrl_state (...).  */
+  CILK_TI_F_LOOP_32,                  /* __cilkrts_cilk_for_32 (...).  */
+  CILK_TI_F_LOOP_64,                  /* __cilkrts_cilk_for_64 (...).  */
+
   /* __cilkrts_stack_frame struct fields.  */
   CILK_TI_FRAME_FLAGS,                /* stack_frame->flags.  */
   CILK_TI_FRAME_PARENT,               /* stack_frame->parent.  */
@@ -77,6 +80,8 @@  extern GTY (()) tree cilk_trees[CILK_TI_MAX];
 #define cilk_rethrow_fndecl           cilk_trees[CILK_TI_F_RETHROW]
 #define cilk_pop_fndecl               cilk_trees[CILK_TI_F_POP]
 #define cilk_save_fp_fndecl           cilk_trees[CILK_TI_F_SAVE_FP]
+#define cilk_for_32_fndecl            cilk_trees[CILK_TI_F_LOOP_32]
+#define cilk_for_64_fndecl            cilk_trees[CILK_TI_F_LOOP_64]
 
 #define cilk_worker_type_fndecl       cilk_trees[CILK_TI_WORKER_TYPE]
 #define cilk_frame_type_decl          cilk_trees[CILK_TI_FRAME_TYPE]
diff --git a/gcc/cp/cp-cilkplus.c b/gcc/cp/cp-cilkplus.c
index f3a2aff..0825777 100644
--- a/gcc/cp/cp-cilkplus.c
+++ b/gcc/cp/cp-cilkplus.c
@@ -143,3 +143,163 @@  cilk_install_body_with_frame_cleanup (tree fndecl, tree orig_body, void *wd)
 			    &list);
 }
 
+/* Helper function for walk_tree, used by found_cilk_for_p.  Sets data (of type
+   bool) to true of *TP is of type CILK_FOR.  If so, then WALK_SUBTREES is 
+   set to zero.  */
+
+static tree
+find_cilk_for_stmt (tree *tp, int *walk_subtrees, void *data)
+{
+  bool *found = (bool *) data;
+  if (TREE_CODE (*tp) == CILK_FOR)
+    {
+      *found = true;
+      data = (void *) found;
+      *walk_subtrees = 0;
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if T is of type CILK_FOR or one of its subtrees is of type
+   CILK_FOR.  */
+
+bool
+found_cilk_for_p (tree t)
+{
+  bool found = false;
+  walk_tree (&t, find_cilk_for_stmt, (void *) &found, NULL);
+  return found;
+}
+
+/* Returns all the statements till CILK_FOR statement in *STMT_LIST.  Removes
+   those statements from STMT_LIST and upate STMT_LIST accordingly.  */
+
+void
+copy_tree_till_cilk_for (tree *stmt_list, tree *new_stmt_list)
+{
+  gcc_assert (TREE_CODE (*stmt_list) == STATEMENT_LIST);
+  gcc_assert (new_stmt_list != NULL);
+
+  if (*new_stmt_list == NULL_TREE)
+    *new_stmt_list = alloc_stmt_list ();
+
+  tree_stmt_iterator tsi;
+  for (tsi = tsi_start (*stmt_list); !tsi_end_p (tsi);)
+    if (!found_cilk_for_p (tsi_stmt (tsi)))
+      {
+	append_to_statement_list (tsi_stmt (tsi), new_stmt_list); 
+	tsi_delink (&tsi);
+      }
+    else if (TREE_CODE (tsi_stmt (tsi)) == STATEMENT_LIST)
+      {
+	copy_tree_till_cilk_for (tsi_stmt_ptr (tsi), new_stmt_list);
+	tsi_next (&tsi);
+      }
+    else if (TREE_CODE (tsi_stmt (tsi)) == BIND_EXPR)
+      {
+	copy_tree_till_cilk_for (&BIND_EXPR_BODY (tsi_stmt (tsi)),
+				 new_stmt_list);
+	tsi_next (&tsi);
+      }
+    else
+      tsi_next (&tsi);
+}
+
+/* Structure to hold the list of variables that are being killed in a
+   statement list.  This structure is only used in a WALK_TREE function.  */
+struct cilk_for_var_list
+{
+  vec <tree, va_gc> *list;
+};
+
+/* Helper function for WALK_TREE used in find_killed_vars function.  
+   Returns all the variables that are being killed (or set) in *TP.  
+   *DATA holds the structure to hold the variable list.  */
+
+static tree
+find_vars (tree *tp, int *walk_subtrees, void *data)
+{
+  struct cilk_for_var_list *vlist = (struct cilk_for_var_list *) data;
+
+  if (!tp || !*tp)
+    return NULL_TREE;
+
+  if (TREE_CODE (*tp) == INIT_EXPR || TREE_CODE (*tp) == MODIFY_EXPR)
+    {
+      vec_safe_push (vlist->list, TREE_OPERAND (*tp, 0));
+      *walk_subtrees = 0;
+    }
+  return NULL_TREE;
+}
+
+/* Returns a vector of TREES that will hold the variable that
+   is killed (i.e. written or set) in STMT_LIST.  */
+
+static vec <tree, va_gc> *
+find_killed_vars (tree stmt_list)
+{
+  struct cilk_for_var_list vlist;
+  memset (&vlist, 0, sizeof (vlist));
+  cp_walk_tree (&stmt_list, find_vars, &vlist, NULL);
+  return vlist.list;
+}
+
+/* Inserts OMP_CLAUSE_FIRSTPRIVATE clauses into *CLAUSES for each variables
+   in *LIST.  */
+
+static void
+insert_firstpriv_clauses (vec <tree, va_gc> *list, tree *clauses)
+{
+  if (vec_safe_is_empty (list))
+    return;
+
+  tree lhs;
+  unsigned ix;
+  FOR_EACH_VEC_SAFE_ELT (list, ix, lhs)
+    {
+      tree new_clause = build_omp_clause (EXPR_LOCATION (lhs),
+					  OMP_CLAUSE_FIRSTPRIVATE);
+      OMP_CLAUSE_DECL (new_clause) = lhs;
+      OMP_CLAUSE_CHAIN (new_clause) = *clauses;
+      *clauses = new_clause;
+    }
+}
+
+/* Returns a BIND_EXPR with BIND_EXPR_VARS holding VARS and BIND_EXPR_BODY
+   contains STMT_LIST and CFOR_PAR_LIST.  */
+
+tree
+cilk_for_create_bind_expr (tree vars, tree stmt_list, tree cfor_par_list)
+{
+  gcc_assert (TREE_CODE (stmt_list) == STATEMENT_LIST);
+  tree_stmt_iterator tsi;
+  tree return_expr = make_node (BIND_EXPR);
+  BIND_EXPR_BODY (return_expr) = alloc_stmt_list ();
+  bool found = false; 
+  vec <tree, va_gc> *cfor_vars = find_killed_vars (stmt_list);
+
+  insert_firstpriv_clauses (cfor_vars, &OMP_PARALLEL_CLAUSES (cfor_par_list));
+
+  /* If there is a supplied list of vars then there is no reason to find them 
+     again.  */
+  if (vars != NULL_TREE)
+    found = true;
+
+  BIND_EXPR_VARS (return_expr) = vars;
+  for (tsi = tsi_start (stmt_list); !tsi_end_p (tsi); tsi_next (&tsi))
+    {
+      /* Only do the adding of BIND_EXPR_VARS the first time since they are
+	 already "chained-on."  */
+      if (!found && TREE_CODE (tsi_stmt (tsi)) == DECL_EXPR)
+	{
+	  tree var = DECL_EXPR_DECL (tsi_stmt (tsi));
+	  BIND_EXPR_VARS (return_expr) = var;
+	  found = true;
+	}
+      else
+	append_to_statement_list (tsi_stmt (tsi),
+				  &BIND_EXPR_BODY (return_expr));
+    }
+  append_to_statement_list (cfor_par_list, &BIND_EXPR_BODY (return_expr));
+  return return_expr;
+}
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 7681b27..0fde703 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6206,6 +6206,9 @@  extern void vtv_build_vtable_verify_fndecl      (void);
 
 /* In cp-cilkplus.c.  */
 extern bool cpp_validate_cilk_plus_loop		(tree);
+extern void copy_tree_till_cilk_for             (tree *, tree *);
+extern tree cilk_for_create_bind_expr           (tree, tree, tree);
+extern bool found_cilk_for_p                    (tree);
 
 /* In cp/cp-array-notations.c */
 extern tree expand_array_notation_exprs         (tree);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index f0722d6..94b7063 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -237,8 +237,8 @@  static void cp_parser_initial_pragma
 static tree cp_literal_operator_id
   (const char *);
 
-static void cp_parser_cilk_simd
-  (cp_parser *, cp_token *);
+static tree cp_parser_cilk_simd
+  (cp_parser *, cp_token *, tree);
 static bool cp_parser_omp_declare_reduction_exprs
   (tree, cp_parser *);
 static tree cp_parser_cilk_simd_vectorlength 
@@ -9368,6 +9368,18 @@  cp_parser_statement (cp_parser* parser, tree in_statement_expr,
 	  statement = cp_parser_iteration_statement (parser, false);
 	  break;
 
+	case RID_CILK_FOR:
+	  if (!flag_cilkplus)
+	    {
+	      error_at (cp_lexer_peek_token (parser->lexer)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      cp_lexer_consume_token (parser->lexer);
+	      statement = error_mark_node;
+	    }
+	  else
+	    statement = cp_parser_cilk_simd (parser, NULL, integer_zero_node);
+	  break;
+	  
 	case RID_BREAK:
 	case RID_CONTINUE:
 	case RID_RETURN:
@@ -28835,7 +28847,7 @@  cp_parser_omp_for_cond (cp_parser *parser, tree decl, enum tree_code code)
     case LE_EXPR:
       break;
     case NE_EXPR:
-      if (code == CILK_SIMD)
+      if (code == CILK_SIMD || code == CILK_FOR)
 	break;
       /* Fall through: OpenMP disallows NE_EXPR.  */
     default:
@@ -29131,7 +29143,7 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 
 static tree
 cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
-			tree *cclauses)
+			tree *cclauses, tree *cfor_block)
 {
   tree init, cond, incr, body, decl, pre_body = NULL_TREE, ret;
   tree real_decl, initv, condv, incrv, declv;
@@ -29160,11 +29172,18 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       bool add_private_clause = false;
       location_t loc;
 
-      if (!cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+      if (code == CILK_SIMD
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
 	{
 	  cp_parser_error (parser, "for statement expected");
 	  return NULL;
 	}
+      if (code == CILK_FOR
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_CILK_FOR))
+	{
+	  cp_parser_error (parser, "_Cilk_for statement expected");
+	  return NULL;
+	}
       loc = cp_lexer_consume_token (parser->lexer)->location;
 
       if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
@@ -29173,13 +29192,26 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       init = decl = real_decl = NULL;
       this_pre_body = push_stmt_list ();
 
+      if (code == CILK_FOR
+	  && cp_lexer_next_token_is_keyword (parser->lexer, RID_STATIC))
+	{
+	  error_at (cp_lexer_peek_token (parser->lexer)->location,
+		    "induction variable cannot be static");
+	  cp_lexer_consume_token (parser->lexer);
+	}
       add_private_clause
 	|= cp_parser_omp_for_loop_init (parser,
-					/*parsing_openmp=*/code != CILK_SIMD,
+					/*parsing_openmp=*/
+					(code != CILK_SIMD && code != CILK_FOR),
 					this_pre_body, for_block,
 					init, decl, real_decl);
 
-      cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+      if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)
+	  && code == CILK_FOR)
+	{
+	  cp_parser_skip_to_end_of_statement (parser);
+	  cp_parser_consume_semicolon_at_end_of_statement (parser);
+	}
       if (this_pre_body)
 	{
 	  this_pre_body = pop_stmt_list (this_pre_body);
@@ -29337,7 +29369,7 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
 
   /* Note that we saved the original contents of this flag when we entered
      the structured block, and so we don't need to re-save it here.  */
-  if (code == CILK_SIMD)
+  if (code == CILK_SIMD || code == CILK_FOR)
     parser->in_statement = IN_CILK_SIMD_FOR;
   else
     parser->in_statement = IN_OMP_FOR;
@@ -29378,7 +29410,17 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
     }
 
   while (!for_block->is_empty ())
-    add_stmt (pop_stmt_list (for_block->pop ()));
+    {
+      tree t = pop_stmt_list (for_block->pop ());
+
+      /* Remove all the statements between the head of statement list and
+	 _Cilk_for statement and store them in *cfor_block.  These statements
+	 are hoisted above the #pragma parallel.  */
+      if (!processing_template_decl && code == CILK_FOR && cfor_block != NULL)
+	copy_tree_till_cilk_for (&t, cfor_block);
+      add_stmt (t);
+
+    }
   release_tree_vector (for_block);
 
   return ret;
@@ -29434,7 +29476,7 @@  cp_parser_omp_simd (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_SIMD, clauses, cclauses);
+  ret = cp_parser_omp_for_loop (parser, OMP_SIMD, clauses, cclauses, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -29522,7 +29564,7 @@  cp_parser_omp_for (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_FOR, clauses, cclauses);
+  ret = cp_parser_omp_for_loop (parser, OMP_FOR, clauses, cclauses, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -29994,7 +30036,7 @@  cp_parser_omp_distribute (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_DISTRIBUTE, clauses, NULL);
+  ret = cp_parser_omp_for_loop (parser, OMP_DISTRIBUTE, clauses, NULL, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -31290,6 +31332,38 @@  cp_parser_initial_pragma (cp_token *first_token)
   cp_lexer_get_preprocessor_token (NULL, first_token);
 }
 
+/* Parses the grainsize pragma for the _Cilk_for statement.
+   Syntax:
+   #pragma cilk grainsize = <VALUE>.  */
+
+static void
+cp_parser_cilk_grainsize (cp_parser *parser, cp_token *pragma_tok)
+{
+  if (cp_parser_require (parser, CPP_EQ, RT_EQ))
+    {
+      tree exp = cp_parser_binary_expression (parser, false, false,
+                                              PREC_NOT_OPERATOR, NULL);
+      cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+      if (!exp || exp == error_mark_node)
+        {
+          error_at (pragma_tok->location, "invalid grainsize for _Cilk_for");
+          return;
+        }
+      cp_token *n_tok = cp_lexer_peek_token (parser->lexer);
+
+      /* Make sure the next token is _Cilk_for, it is invalid otherwise.  */
+      if (n_tok && n_tok->type == CPP_KEYWORD
+	  && n_tok->keyword == RID_CILK_FOR) 
+	cp_parser_cilk_simd (parser, NULL, exp);
+      else
+	warning_at (cp_lexer_peek_token (parser->lexer)->location, 0,
+		    "%<#pragma cilk grainsize%> is not followed by "
+		    "%<_Cilk_for%>");
+      return;
+    }
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -31469,9 +31543,30 @@  cp_parser_pragma (cp_parser *parser, enum pragma_context context)
 		    "%<#pragma simd%> must be inside a function");
 	  break;
 	}
-      cp_parser_cilk_simd (parser, pragma_tok);
+      cp_parser_cilk_simd (parser, pragma_tok, NULL_TREE);
       return true;
 
+    case PRAGMA_CILK_GRAINSIZE:
+      if (context == pragma_external)
+        {
+          error_at (pragma_tok->location,
+                    "%<#pragma cilk grainsize%> must be inside a function");
+          break;
+        }
+
+      /* Ignore the pragma if Cilk Plus is not enabled.  */
+      if (flag_cilkplus)
+        {
+          cp_parser_cilk_grainsize (parser, pragma_tok);
+          return true;
+        }
+      else
+        {
+          error_at (pragma_tok->location, "-fcilkplus must be enabled to use "
+                    "%<#pragma cilk grainsize%>");
+          break;
+	}
+
     default:
       gcc_assert (id >= PRAGMA_FIRST_EXTERNAL);
       c_invoke_pragma_handler (id);
@@ -31789,31 +31884,104 @@  cp_parser_cilk_simd_all_clauses (cp_parser *parser, cp_token *pragma_token)
     return c_finish_cilk_clauses (clauses);
 }
 
-/* Main entry-point for parsing Cilk Plus <#pragma simd> for loops.  */
+/* Main entry-point for parsing Cilk Plus <#pragma simd> for and _Cilk_for
+   loops.  This function returns NULL_TREE whenever it is parsing the
+   <#pragma simd> for because the caller does not check the return value.
+   _Cilk_for's caller checks this value and thus return error_mark_node
+   when errors happen and a valid value when things go well.  */
 
-static void
-cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token)
+static tree
+cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token, tree grain)
 {
-  tree clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
-
+  bool is_cilk_for = !pragma_token ? true : false;
+  
+  tree clauses = NULL_TREE;
+  if (!is_cilk_for)
+    clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
+  
   if (clauses == error_mark_node)
-    return;
+    return NULL_TREE;
   
-  if (cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
+  if (!is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
     {
       error_at (cp_lexer_peek_token (parser->lexer)->location,
 		"for statement expected");
-      return;
+      return NULL_TREE;
+    }
+  if (is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_CILK_FOR))
+    {
+      error_at (cp_lexer_peek_token (parser->lexer)->location,
+		"_Cilk_for statement expected");
+      return error_mark_node;
     }
 
+  tree top_block = NULL_TREE, topmost_blk = NULL_TREE;
+  if (is_cilk_for)
+    {
+      topmost_blk = push_stmt_list ();
+      top_block = begin_omp_parallel ();
+    }
+  
   tree sb = begin_omp_structured_block ();
   int save = cp_parser_begin_omp_structured_block (parser);
-  tree ret = cp_parser_omp_for_loop (parser, CILK_SIMD, clauses, NULL);
+   
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  tree cfor_blk = NULL_TREE;
+  tree ret = cp_parser_omp_for_loop (parser, code, clauses, NULL, &cfor_blk);
   if (ret)
     cpp_validate_cilk_plus_loop (OMP_FOR_BODY (ret));
+  
+  /* For _Cilk_for statements, the grain value is stored in a SCHEDULE
+     clause.  */
+  if (is_cilk_for && ret)
+    {
+      tree l = build_omp_clause (EXPR_LOCATION (grain), OMP_CLAUSE_SCHEDULE);
+      OMP_CLAUSE_SCHEDULE_KIND (l) = OMP_CLAUSE_SCHEDULE_CILKFOR;
+      OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (l) = grain;
+      OMP_CLAUSE_CHAIN (l) = OMP_FOR_CLAUSES (ret);
+      OMP_FOR_CLAUSES (ret) = l;
+    }
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_structured_block (sb));
-  return;
+
+  if (!is_cilk_for)
+    {
+      add_stmt (finish_omp_structured_block (sb));
+      return NULL_TREE;
+    }
+
+  tree sb_block = finish_omp_structured_block (sb);
+  tree vars = NULL_TREE, sb_blk_body = sb_block;
+
+  /* For iterators, cfor_blk holds the mapping from orginal vector 
+     iterators to the integer ones that the c_finish_omp_for remaps.
+     This info. must be pushed above the #pragma omp parallel so that
+     the IF_CLAUSE (that holds the loop-count) can use them to compute the
+     loop-count.  */
+  if (TREE_CODE (sb_block) == BIND_EXPR && cfor_blk != NULL_TREE)
+    {
+      vars = BIND_EXPR_VARS (sb_block);
+      sb_blk_body = BIND_EXPR_BODY (sb_block);
+    }
+
+  add_stmt (sb_blk_body);
+  tree parallel_clauses = NULL_TREE;
+
+  if (!processing_template_decl)
+    cilk_for_move_clauses_upward (&parallel_clauses, ret);
+  tree stmt = finish_omp_parallel (parallel_clauses, top_block);
+  OMP_PARALLEL_COMBINED (stmt) = 1;
+  topmost_blk = pop_stmt_list (topmost_blk);
+
+  if (cfor_blk != NULL_TREE)
+    {
+      tree bind_expr = cilk_for_create_bind_expr (vars, cfor_blk, topmost_blk);
+      add_stmt (bind_expr);
+      return bind_expr;
+    }
+  add_stmt (topmost_blk);
+  return topmost_blk;
 }
 
 /* Create an identifier for a generic parameter type (a synthesized
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
old mode 100644
new mode 100755
index 7967db8..3b52897
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -13580,13 +13580,51 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
       break;
 
     case OMP_PARALLEL:
-      tmp = tsubst_omp_clauses (OMP_PARALLEL_CLAUSES (t), false,
-				args, complain, in_decl);
-      stmt = begin_omp_parallel ();
-      RECUR (OMP_PARALLEL_BODY (t));
-      OMP_PARALLEL_COMBINED (finish_omp_parallel (tmp, stmt))
-	= OMP_PARALLEL_COMBINED (t);
-      break;
+      {
+	tmp = tsubst_omp_clauses (OMP_PARALLEL_CLAUSES (t), false,
+				  args, complain, in_decl);
+	
+	tree top_block = NULL_TREE, topmost_blk = NULL_TREE;
+	bool is_cilk_for = false;
+	if (flag_cilkplus && found_cilk_for_p (OMP_PARALLEL_BODY (t)))
+	  {
+	    is_cilk_for = true;
+	    topmost_blk = push_stmt_list ();
+	    top_block = begin_omp_parallel ();
+	  }
+	else
+	  stmt = begin_omp_parallel ();
+    
+	RECUR (OMP_PARALLEL_BODY (t));
+	tree cfor_blk = NULL_TREE;
+	if (is_cilk_for)
+	  {
+	    tree sb_blk_body = top_block;
+	    if (TREE_CODE (sb_blk_body) == BIND_EXPR) 
+	      sb_blk_body = BIND_EXPR_BODY (sb_blk_body);
+
+	    copy_tree_till_cilk_for (&sb_blk_body, &cfor_blk);
+	    cilk_for_move_clauses_upward (&tmp, top_block);
+	    top_block = finish_omp_parallel (tmp, sb_blk_body);
+	  }
+	else
+	  {
+	    stmt = finish_omp_parallel (tmp, stmt);
+	    OMP_PARALLEL_COMBINED (stmt) = OMP_PARALLEL_COMBINED (t);
+	  }
+	if (is_cilk_for)
+	  {
+	    OMP_PARALLEL_COMBINED (top_block) = 1;
+	    topmost_blk = pop_stmt_list (topmost_blk);
+	    if (cfor_blk != NULL_TREE) 
+	      stmt = cilk_for_create_bind_expr (NULL_TREE, cfor_blk, 
+						topmost_blk);
+	    else
+	      stmt = topmost_blk;
+	    add_stmt (stmt);
+	  }	
+      } 
+    break;
 
     case OMP_TASK:
       tmp = tsubst_omp_clauses (OMP_TASK_CLAUSES (t), false,
@@ -13599,6 +13637,7 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
     case OMP_FOR:
     case OMP_SIMD:
     case CILK_SIMD:
+    case CILK_FOR:
     case OMP_DISTRIBUTE:
       {
 	tree clauses, body, pre_body;
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
old mode 100644
new mode 100755
index 9fb4fc0..ec47d0e
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6058,6 +6058,7 @@  handle_omp_for_class_iterator (int i, location_t locus, tree declv, tree initv,
     case GE_EXPR:
     case LT_EXPR:
     case LE_EXPR:
+    case NE_EXPR:
       if (TREE_OPERAND (cond, 1) == iter)
 	cond = build2 (swap_tree_comparison (TREE_CODE (cond)),
 		       TREE_TYPE (cond), iter, TREE_OPERAND (cond, 0));
@@ -6470,12 +6471,22 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv, tree initv,
   if (IS_EMPTY_STMT (pre_body))
     pre_body = NULL;
 
+  tree cf_step = NULL_TREE, cf_init = NULL_TREE, cf_end = NULL_TREE;
   omp_for = c_finish_omp_for (locus, code, declv, initv, condv, incrv,
-			      body, pre_body);
-
+			      body, pre_body, &cf_init, &cf_end, &cf_step);
   if (omp_for == NULL)
     return NULL;
 
+  if (code == CILK_FOR && !processing_template_decl)
+    {
+      tree count = fold_build2 (MINUS_EXPR, TREE_TYPE (cf_end), cf_end,
+				cf_init);
+      count = fold_build2 (CEIL_DIV_EXPR, TREE_TYPE (count), count, cf_step);
+      tree c = build_omp_clause (EXPR_LOCATION (count), OMP_CLAUSE_IF);
+      OMP_CLAUSE_IF_EXPR (c) = count;
+      clauses = chainon (clauses, c);
+    }
+
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INCR (omp_for)); i++)
     {
       decl = TREE_OPERAND (TREE_VEC_ELT (OMP_FOR_INIT (omp_for), i), 0);
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 2d1e1c7..f87c0cf 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -45,6 +45,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "value-prof.h"
 #include "trans-mem.h"
 
+static void dump_gimple_omp_parallel (pretty_printer *, gimple, int, int,
+				      bool);
 #define INDENT(SPACE)							\
   do { int i; for (i = 0; i < SPACE; i++) pp_space (buffer); } while (0)
 
@@ -1124,6 +1126,10 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	case GF_OMP_FOR_KIND_DISTRIBUTE:
 	  kind = " distribute";
 	  break;
+	case GF_OMP_FOR_KIND_CILKFOR:
+	  gcc_assert (flag_cilkplus);
+	  kind = "";
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -1158,16 +1164,25 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	case GF_OMP_FOR_KIND_DISTRIBUTE:
 	  pp_string (buffer, "#pragma omp distribute");
 	  break;
+	case GF_OMP_FOR_KIND_CILKFOR:
+	  gcc_assert (flag_cilkplus);
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
-      dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags);
+      if (!flag_cilkplus
+	  || gimple_omp_for_kind (gs) != GF_OMP_FOR_KIND_CILKFOR) 
+	dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags);
       for (i = 0; i < gimple_omp_for_collapse (gs); i++)
 	{
 	  if (i)
 	    spc += 2;
 	  newline_and_indent (buffer, spc);
-	  pp_string (buffer, "for (");
+	  if (flag_cilkplus 
+	      && gimple_omp_for_kind (gs) == GF_OMP_FOR_KIND_CILKFOR)
+	    pp_string (buffer, "_Cilk_for (");
+	  else
+	    pp_string (buffer, "for (");
 	  dump_generic_node (buffer, gimple_omp_for_index (gs, i), spc,
 			     flags, false);
 	  pp_string (buffer, " = ");
@@ -1192,6 +1207,9 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	    case GE_EXPR:
 	      pp_greater_equal (buffer);
 	      break;
+	    case NE_EXPR:
+	      pp_string (buffer, "!=");
+	      break;
 	    default:
 	      gcc_unreachable ();
 	    }
@@ -1210,6 +1228,9 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 
       if (!gimple_seq_empty_p (gimple_omp_body (gs)))
 	{
+	  if (flag_cilkplus
+	      && gimple_omp_for_kind (gs) == GF_OMP_FOR_KIND_CILKFOR) 
+	    dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags); 
 	  newline_and_indent (buffer, spc + 2);
 	  pp_left_brace (buffer);
 	  pp_newline (buffer);
@@ -1846,7 +1867,7 @@  dump_gimple_phi (pretty_printer *buffer, gimple phi, int spc, bool comment,
 
 static void
 dump_gimple_omp_parallel (pretty_printer *buffer, gimple gs, int spc,
-                          int flags)
+                          int flags, bool is_cilk_for)
 {
   if (flags & TDF_RAW)
     {
@@ -1860,7 +1881,10 @@  dump_gimple_omp_parallel (pretty_printer *buffer, gimple gs, int spc,
   else
     {
       gimple_seq body;
-      pp_string (buffer, "#pragma omp parallel");
+      if (is_cilk_for) 
+	pp_string (buffer, "compiler-inserted clauses for cilk-for body: ");
+      else
+	pp_string (buffer, "#pragma omp parallel");
       dump_omp_clauses (buffer, gimple_omp_parallel_clauses (gs), spc, flags);
       if (gimple_omp_parallel_child_fn (gs))
 	{
@@ -2137,7 +2161,7 @@  pp_gimple_stmt_1 (pretty_printer *buffer, gimple gs, int spc, int flags)
       break;
 
     case GIMPLE_OMP_PARALLEL:
-      dump_gimple_omp_parallel (buffer, gs, spc, flags);
+      dump_gimple_omp_parallel (buffer, gs, spc, flags, false);
       break;
 
     case GIMPLE_OMP_TASK:
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 0e80d2e..194045c 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -91,13 +91,14 @@  enum gf_mask {
     GF_CALL_ALLOCA_FOR_VAR	= 1 << 5,
     GF_CALL_INTERNAL		= 1 << 6,
     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
-    GF_OMP_FOR_KIND_MASK	= 3 << 0,
+    GF_OMP_FOR_KIND_MASK	= 7 << 0,
     GF_OMP_FOR_KIND_FOR		= 0 << 0,
     GF_OMP_FOR_KIND_DISTRIBUTE	= 1 << 0,
     GF_OMP_FOR_KIND_SIMD	= 2 << 0,
     GF_OMP_FOR_KIND_CILKSIMD	= 3 << 0,
-    GF_OMP_FOR_COMBINED		= 1 << 2,
-    GF_OMP_FOR_COMBINED_INTO	= 1 << 3,
+    GF_OMP_FOR_KIND_CILKFOR     = 4 << 0,
+    GF_OMP_FOR_COMBINED		= 1 << 3,
+    GF_OMP_FOR_COMBINED_INTO	= 1 << 4,
     GF_OMP_TARGET_KIND_MASK	= 3 << 0,
     GF_OMP_TARGET_KIND_REGION	= 0 << 0,
     GF_OMP_TARGET_KIND_DATA	= 1 << 0,
@@ -4563,6 +4564,16 @@  gimple_omp_for_set_pre_body (gimple gs, gimple_seq pre_body)
   omp_for_stmt->pre_body = pre_body;
 }
 
+/* Returns the induction variable of type TREE from GS that is of type 
+   GIMPLE_STATEMENT_OMP_FOR.  */
+
+static inline tree
+gimple_cilk_for_induction_var (const_gimple gs)
+{
+  const gimple_statement_omp_for *cilk_for_stmt =
+    as_a <const gimple_statement_omp_for> (gs);
+  return cilk_for_stmt->iter->index;
+}
 
 /* Return the clauses associated with OMP_PARALLEL GS.  */
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index ff341d4..7488563 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -5856,7 +5856,8 @@  omp_check_private (struct gimplify_omp_ctx *ctx, tree decl, bool copyprivate)
 
 static void
 gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
-			   enum omp_region_type region_type)
+			   enum omp_region_type region_type,
+			   bool is_cilk_for)
 {
   struct gimplify_omp_ctx *ctx, *outer_ctx;
   tree c;
@@ -6086,8 +6087,12 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 
 	case OMP_CLAUSE_FINAL:
 	case OMP_CLAUSE_IF:
-	  OMP_CLAUSE_OPERAND (c, 0)
-	    = gimple_boolify (OMP_CLAUSE_OPERAND (c, 0));
+	  /* In _Cilk_for we insert an IF clause as a mechanism to
+	     pass in the count information.  So, there is no reason to
+	     boolify them.  */
+	  if (!is_cilk_for) 
+	    OMP_CLAUSE_OPERAND (c, 0) 
+	      = gimple_boolify (OMP_CLAUSE_OPERAND (c, 0));
 	  /* Fall through.  */
 
 	case OMP_CLAUSE_SCHEDULE:
@@ -6454,6 +6459,21 @@  gimplify_adjust_omp_clauses (tree *list_p)
   delete_omp_context (ctx);
 }
 
+/* Removes the OMP clause C from a list of clauses in *LIST_P.  */
+
+static void
+omp_remove_clause (tree c, tree *list_p)
+{
+  tree ii = NULL_TREE;
+  while ((ii = *list_p) != NULL)
+    {
+      if (simple_cst_equal (ii, c) == 1)
+	*list_p = OMP_CLAUSE_CHAIN (ii);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (ii);
+    }
+}
+
 /* Gimplify the contents of an OMP_PARALLEL statement.  This involves
    gimplification of the body, as well as scanning the body for used
    variables.  We need to do this scan now, because variable-sized
@@ -6465,11 +6485,29 @@  gimplify_omp_parallel (tree *expr_p, gimple_seq *pre_p)
   tree expr = *expr_p;
   gimple g;
   gimple_seq body = NULL;
-
+  bool is_cilk_for = false;
+  tree c = NULL_TREE;
+  for (c = OMP_PARALLEL_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+    if (flag_cilkplus && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SCHEDULE
+	&& OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_CILKFOR)
+      {
+	/* The schedule clause is kept upto this point so that it can 
+	   indicate whether this #pragma omp parallel is something a 
+	   _Cilk_for statement inserted.  If so, then indicate
+	   is_cilk_for is true so that the gimplify_scan_omp_clauses does 
+	   not boolify the IF CLAUSE, which stores the count value.  */
+	gcc_assert (flag_cilkplus);
+	is_cilk_for = true;
+	break;
+      } 
+  
+  /* The SCHEDULE clause is not necessary anymore.  */
+  if (is_cilk_for) 
+    omp_remove_clause (c, &OMP_PARALLEL_CLAUSES (expr));
   gimplify_scan_omp_clauses (&OMP_PARALLEL_CLAUSES (expr), pre_p,
 			     OMP_PARALLEL_COMBINED (expr)
 			     ? ORT_COMBINED_PARALLEL
-			     : ORT_PARALLEL);
+			     : ORT_PARALLEL, is_cilk_for);
 
   push_gimplify_context ();
 
@@ -6505,7 +6543,7 @@  gimplify_omp_task (tree *expr_p, gimple_seq *pre_p)
   gimplify_scan_omp_clauses (&OMP_TASK_CLAUSES (expr), pre_p,
 			     find_omp_clause (OMP_TASK_CLAUSES (expr),
 					      OMP_CLAUSE_UNTIED)
-			     ? ORT_UNTIED_TASK : ORT_TASK);
+			     ? ORT_UNTIED_TASK : ORT_TASK, false);
 
   push_gimplify_context ();
 
@@ -6570,8 +6608,9 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 
   simd = TREE_CODE (for_stmt) == OMP_SIMD
     || TREE_CODE (for_stmt) == CILK_SIMD;
-  gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p,
-			     simd ? ORT_SIMD : ORT_WORKSHARE);
+    gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p,
+			       simd ? ORT_SIMD : ORT_WORKSHARE,
+			       TREE_CODE (for_stmt) == CILK_FOR);
 
   /* Handle OMP_FOR_INIT.  */
   for_pre_body = NULL;
@@ -6627,7 +6666,7 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       tree c = NULL_TREE;
       if (orig_for_stmt != for_stmt)
 	/* Do this only on innermost construct for combined ones.  */;
-      else if (simd)
+      else if (simd || TREE_CODE (for_stmt) == CILK_FOR)
 	{
 	  splay_tree_node n = splay_tree_lookup (gimplify_omp_ctxp->variables,
 						 (splay_tree_key)decl);
@@ -6832,6 +6871,7 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
     case OMP_FOR: kind = GF_OMP_FOR_KIND_FOR; break;
     case OMP_SIMD: kind = GF_OMP_FOR_KIND_SIMD; break;
     case CILK_SIMD: kind = GF_OMP_FOR_KIND_CILKSIMD; break;
+    case CILK_FOR: kind = GF_OMP_FOR_KIND_CILKFOR; break;
     case OMP_DISTRIBUTE: kind = GF_OMP_FOR_KIND_DISTRIBUTE; break;
     default:
       gcc_unreachable ();
@@ -6865,7 +6905,7 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
       gimple_omp_for_set_incr (gfor, i, TREE_OPERAND (t, 1));
     }
-
+  
   gimplify_seq_add_stmt (pre_p, gfor);
   if (ret != GS_ALL_DONE)
     return GS_ERROR;
@@ -6902,7 +6942,7 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
     default:
       gcc_unreachable ();
     }
-  gimplify_scan_omp_clauses (&OMP_CLAUSES (expr), pre_p, ort);
+  gimplify_scan_omp_clauses (&OMP_CLAUSES (expr), pre_p, ort, false);
   if (ort == ORT_TARGET || ort == ORT_TARGET_DATA)
     {
       push_gimplify_context ();
@@ -6962,7 +7002,7 @@  gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p)
   gimple stmt;
 
   gimplify_scan_omp_clauses (&OMP_TARGET_UPDATE_CLAUSES (expr), pre_p,
-			     ORT_WORKSHARE);
+			     ORT_WORKSHARE, false);
   gimplify_adjust_omp_clauses (&OMP_TARGET_UPDATE_CLAUSES (expr));
   stmt = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_UPDATE,
 				  OMP_TARGET_UPDATE_CLAUSES (expr));
@@ -7904,6 +7944,7 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	case OMP_FOR:
 	case OMP_SIMD:
 	case CILK_SIMD:
+	case CILK_FOR:
 	case OMP_DISTRIBUTE:
 	  ret = gimplify_omp_for (expr_p, pre_p);
 	  break;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
old mode 100644
new mode 100755
index 91c8656..3454dc9
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -71,6 +71,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "tree-nested.h"
 #include "tree-eh.h"
+#include "cilk.h"
 
 
 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -198,6 +199,13 @@  struct omp_for_data
   struct omp_for_data_loop *loops;
 };
 
+/* A structure with necessary elements from _Cilk_for statement.  This
+   struct. node is passed in to WALK_STMT_INFO->INFO.  */
+struct cilk_for_info 
+{
+  bool found;
+  tree induction_var;
+};
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -314,6 +322,8 @@  extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
   fd->have_ordered = false;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
+  if (gimple_omp_for_kind (fd->for_stmt) ==  GF_OMP_FOR_KIND_CILKFOR)
+    fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
   collapse_iter = NULL;
   collapse_count = NULL;
 
@@ -392,7 +402,9 @@  extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
 	  break;
 	case NE_EXPR:
 	  gcc_assert (gimple_omp_for_kind (for_stmt)
-		      == GF_OMP_FOR_KIND_CILKSIMD);
+		      == GF_OMP_FOR_KIND_CILKSIMD
+		      || gimple_omp_for_kind (for_stmt)
+		      == GF_OMP_FOR_KIND_CILKFOR);
 	  break;
 	case LE_EXPR:
 	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
@@ -1818,27 +1830,120 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
 }
 
-/* Create a new name for omp child function.  Returns an identifier.  */
+/* Create a new name for omp child function.  Returns an identifier.  If 
+   IS_CILK_FOR is true then the suffix for the child function is 
+   "_cilk_for_fn."  */
 
 static tree
-create_omp_child_function_name (bool task_copy)
+create_omp_child_function_name (bool task_copy, bool is_cilk_for)
 {
+  if (is_cilk_for)
+    return clone_function_name (current_function_decl, "_cilk_for_fn");
   return (clone_function_name (current_function_decl,
 			       task_copy ? "_omp_cpyfn" : "_omp_fn"));
 }
 
+/* Helper function for walk_gimple_seq function.  *GSI_P is the gimple stmt.
+   iterator passed by walk_gimple_seq and *WI->INFO holds the CILK_FOR_INFO
+   structure.  This function sets the values inside this structure if it
+   finds a _Cilk_for statement in *GSI_P.  HANDLED_OPS_P is unused.  */
+
+static tree
+find_cilk_for_stmt (gimple_stmt_iterator *gsi_p,
+		    bool *handled_ops_p ATTRIBUTE_UNUSED,
+		    struct walk_stmt_info *wi)
+{
+  struct cilk_for_info *cf_info = (struct cilk_for_info *) wi->info;
+  gimple stmt = gsi_stmt (*gsi_p);
+
+  if (gimple_code (stmt) == GIMPLE_OMP_FOR
+      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_CILKFOR)
+      /* For nested _Cilk_for statements, just look into the
+	 outer-most one.  */
+      && cf_info->found == false)
+    {
+      cf_info->found = true;
+      cf_info->induction_var = gimple_cilk_for_induction_var (stmt);
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if STMT contains a CILK_FOR statement.  If found then
+   populate *IND_VAR and *LOOP_COUNT with induction variable
+   and loop-count value.  Otherwise these values remain untouched.  
+   IND_VAR and LOOP_COUNT can be NULL and if so then they are also 
+   left untouched.  */
+
+static bool
+is_cilk_for_stmt (gimple stmt, tree *ind_var)
+{
+  if (!flag_cilkplus)
+    return false;
+  if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
+    stmt = gimple_omp_body (stmt);
+  if (gimple_code (stmt) == GIMPLE_BIND)
+    {
+      gimple_seq body = gimple_bind_body (stmt);
+      struct walk_stmt_info wi;
+      struct cilk_for_info cf_info;
+      memset (&cf_info, 0, sizeof (struct cilk_for_info));
+      memset (&wi, 0, sizeof (wi));
+      wi.info = &cf_info;
+      walk_gimple_seq (body, find_cilk_for_stmt, NULL, &wi);
+      if (cf_info.found)
+	{
+	  if (ind_var)
+	    *ind_var = cf_info.induction_var;
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Returns the type of the induction variable for the child function for
+   _Cilk_for and the types for _high and _low variables based on TYPE.  */
+
+static tree
+cilk_for_check_loop_diff_type (tree type)
+{
+  if (type == integer_type_node)
+    return type;
+  else if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
+    { 
+      if (TYPE_UNSIGNED (type)) 
+	return uint32_type_node;
+      else
+	return integer_type_node;
+    }
+  else
+    {
+      if (TYPE_UNSIGNED (type)) 
+	return uint64_type_node;
+      else
+	return long_long_integer_type_node;
+    }
+  gcc_unreachable ();
+}
+
 /* Build a decl for the omp child function.  It'll not contain a body
    yet, just the bare decl.  */
 
 static void
 create_omp_child_function (omp_context *ctx, bool task_copy)
 {
-  tree decl, type, name, t;
+  tree decl, type, name, t, ind_var = NULL_TREE;
 
-  name = create_omp_child_function_name (task_copy);
+  bool is_cilk_for = is_cilk_for_stmt (ctx->stmt, &ind_var);
+  tree cilk_var_type = (is_cilk_for ?
+    cilk_for_check_loop_diff_type (TREE_TYPE (ind_var)) : NULL_TREE);
+  
+  name = create_omp_child_function_name (task_copy, is_cilk_for);
   if (task_copy)
     type = build_function_type_list (void_type_node, ptr_type_node,
 				     ptr_type_node, NULL_TREE);
+  else if (is_cilk_for)
+    type = build_function_type_list (void_type_node, ptr_type_node,
+				     cilk_var_type, cilk_var_type, NULL_TREE);
   else
     type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
 
@@ -1888,13 +1993,44 @@  create_omp_child_function (omp_context *ctx, bool task_copy)
   DECL_CONTEXT (t) = decl;
   DECL_RESULT (decl) = t;
 
-  t = build_decl (DECL_SOURCE_LOCATION (decl),
-		  PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
+  /* _Cilk_for's child function requires two extra parameters called 
+     __low and __high that are set the by Cilk runtime when it calls this 
+     function.  */
+  if (is_cilk_for)
+    {
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__high"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__low"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+    }
+
+  tree data_name = get_identifier (".omp_data_i");
+  t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
+		  ptr_type_node);
   DECL_ARTIFICIAL (t) = 1;
   DECL_NAMELESS (t) = 1;
   DECL_ARG_TYPE (t) = ptr_type_node;
   DECL_CONTEXT (t) = current_function_decl;
   TREE_USED (t) = 1;
+  if (is_cilk_for)
+    DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
   DECL_ARGUMENTS (decl) = t;
   if (!task_copy)
     ctx->receiver_decl = t;
@@ -4313,6 +4449,44 @@  expand_parallel_call (struct omp_region *region, basic_block bb,
 			    false, GSI_CONTINUE_LINKING);
 }
 
+/* Insert a function call whose name is FUNC_NAME with the information from
+   ENTRY_STMT into the basic_block BB.  */
+
+static void
+expand_cilk_for_call (basic_block bb, gimple entry_stmt,
+		      vec <tree, va_gc> *ws_args)
+{
+  tree t, t1, t2;
+  gimple_stmt_iterator gsi;
+  vec <tree, va_gc> *args;
+
+  gcc_assert (vec_safe_length (ws_args) == 2);
+  tree func_name = (*ws_args)[0];
+  tree grain = (*ws_args)[1];
+
+  tree clauses = gimple_omp_parallel_clauses (entry_stmt); 
+  tree count = find_omp_clause (clauses, OMP_CLAUSE_IF);
+  gcc_assert (count != NULL_TREE);
+  count = OMP_CLAUSE_IF_EXPR (count);
+  
+  gsi = gsi_last_bb (bb);
+  t = gimple_omp_parallel_data_arg (entry_stmt);
+  if (t == NULL)
+    t1 = null_pointer_node;
+  else
+    t1 = build_fold_addr_expr (t);
+  t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
+  
+  vec_alloc (args, 4);
+  args->quick_push (t2);
+  args->quick_push (t1);
+  args->quick_push (count);
+  args->quick_push (grain);
+  t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
+
+  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, 
+			    GSI_CONTINUE_LINKING);
+}
 
 /* Build the function call to GOMP_task to actually
    generate the task operation.  BB is the block where to insert the code.  */
@@ -4648,7 +4822,38 @@  expand_omp_taskreg (struct omp_region *region)
   entry_bb = region->entry;
   exit_bb = region->exit;
 
-  if (is_combined_parallel (region))
+  /* The way _Cilk_for is constructed in this compiler can be thought of
+     as a parallel omp_for.  But the inner workings between them are very
+     different so we need a way to differenciate between them.  Thus, we
+     added a new schedule type called OMP_CLAUSE_SCHEDULE_CILKFOR, which 
+     pretty much says that this is not a parallel omp for but a _Cilk_for
+     statement.  */
+  bool is_cilk_for =
+    (flag_cilkplus && region->inner &&
+     (region->inner->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR));
+
+  /* Extract the __high and __low parameter from the function.  */
+  tree high_arg = NULL_TREE, low_arg = NULL_TREE;
+  if (is_cilk_for)
+    {
+      for (tree ii_arg = DECL_ARGUMENTS (child_fn); ii_arg != NULL_TREE;
+	   ii_arg = TREE_CHAIN (ii_arg))
+	{
+	  if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)), "__high"))
+	    high_arg = ii_arg;
+	  if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)), "__low"))
+	    low_arg = ii_arg;
+	}
+      gcc_assert (high_arg);
+      gcc_assert (low_arg);
+    }
+  
+  if (is_cilk_for) 
+    /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
+       and the inner statement contains the name of the built-in function
+       and grain.  */
+    ws_args = region->inner->ws_args;
+  else if (is_combined_parallel (region))
     ws_args = region->ws_args;
   else
     ws_args = NULL;
@@ -4755,6 +4960,49 @@  expand_omp_taskreg (struct omp_region *region)
 	    }
 	}
 
+      /* In here the calls to the GET_NUM_THREADS and GET_THREAD_NUM are
+	 removed.  Further, they will be replaced by __low and __high
+	 parameter values.  */
+      gimple high_assign = NULL, low_assign = NULL;
+      if (is_cilk_for)
+	{
+	  gimple_stmt_iterator gsi2 = gsi_start_bb (single_succ (entry_bb));
+	  while (!gsi_end_p (gsi2))
+	    {
+	      gimple stmt = gsi_stmt (gsi2);
+	
+	      if (gimple_call_builtin_p (stmt, BUILT_IN_OMP_GET_NUM_THREADS))
+		{
+		  /* There can only be one one call to these two functions
+		     If there are multiple, then something went wrong
+		     somewhere.  */
+		  gcc_assert (low_assign == NULL);
+		  tree ltype = TREE_TYPE (gimple_get_lhs (stmt));
+		  tree tmp2 = create_tmp_reg (TREE_TYPE (low_arg), NULL);
+		  low_assign = gimple_build_assign 
+		    (gimple_get_lhs (stmt), fold_convert (ltype, tmp2));
+		  gsi_remove (&gsi2, true);
+		  gimple tmp_stmt = gimple_build_assign (tmp2, low_arg);
+		  gsi_insert_before (&gsi2, low_assign, GSI_NEW_STMT);
+		  gsi_insert_before (&gsi2, tmp_stmt, GSI_NEW_STMT);
+		}
+	      else if (gimple_call_builtin_p (stmt,
+					      BUILT_IN_OMP_GET_THREAD_NUM))
+		{
+		  gcc_assert (high_assign == NULL);
+		  tree htype = TREE_TYPE (gimple_get_lhs (stmt));
+		  tree tmp2 = create_tmp_reg (TREE_TYPE (high_arg), NULL);
+		  
+		  high_assign = gimple_build_assign 
+		    (gimple_get_lhs (stmt), fold_convert (htype, tmp2));
+		  gsi_remove (&gsi2, true);
+		  gimple tmp_stmt = gimple_build_assign (tmp2, high_arg);
+		  gsi_insert_before (&gsi2, high_assign, GSI_NEW_STMT);
+		  gsi_insert_before (&gsi2, tmp_stmt, GSI_NEW_STMT);
+		}
+	      gsi_next (&gsi2);
+	    }
+	}      
       /* Declare local variables needed in CHILD_CFUN.  */
       block = DECL_INITIAL (child_fn);
       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
@@ -4862,7 +5110,9 @@  expand_omp_taskreg (struct omp_region *region)
     }
 
   /* Emit a library call to launch the children threads.  */
-  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
+  if (is_cilk_for)
+    expand_cilk_for_call (new_bb, entry_stmt, ws_args);
+  else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
     expand_parallel_call (region, new_bb, entry_stmt, ws_args);
   else
     expand_task_call (new_bb, entry_stmt);
@@ -6540,6 +6790,227 @@  expand_omp_for_static_chunk (struct omp_region *region,
     }
 }
 
+/* A subroutine of expand_omp_for.  Generate code for _Cilk_for loop.  
+   Given parameters: 
+   for (V = N1; V cond N2; V += STEP) BODY; 
+   
+   where COND is "<" or ">", we generate pseudocode
+    
+   for (ind_var = low; ind_var < high; ind_var++)
+   {  
+      if (n1 < n2)
+	V = n1 + (ind_var * STEP)
+      else
+        V = n2 - (ind_var * STEP);
+
+      <BODY>
+    }  
+  
+    In the above pseudocode, low and high are function parameters of the
+    child function.  In the function below, we are inserting a temp.
+    variable that will be making a call to two OMP functions that will not be
+    found in the body of _Cilk_for (since OMP_FOR cannot be mixed 
+    with _Cilk_for).  These functions are replaced with low and high 
+    by the function that handleds taskreg.  */
+
+
+static void
+expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
+{
+  bool broken_loop = region->cont == NULL;
+  tree type = cilk_for_check_loop_diff_type (TREE_TYPE (fd->loop.v));
+  basic_block entry_bb = region->entry;
+  basic_block cont_bb = region->cont;
+  
+  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
+  gcc_assert (broken_loop
+	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
+  basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
+  basic_block l1_bb, l2_bb;
+
+  if (!broken_loop)
+    {
+      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
+      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
+      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
+      l2_bb = BRANCH_EDGE (entry_bb)->dest;
+    }
+  else
+    {
+      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
+      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
+      l2_bb = single_succ (l1_bb);
+    }
+  basic_block exit_bb = region->exit;
+  basic_block l2_dom_bb = NULL;
+
+  gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
+
+  /* Below statements until the "tree high_val = ..." are pseudo statements 
+     used to pass information to be used by expand_omp_taskreg.
+     low_val and high_val will be replaced by the __low and __high
+     parameter from the child function.
+
+     The call_exprs part is a place-holder, it is mainly used 
+     to distinctly identify to the top-level part that this is
+     where we should put low and high (reasoning given in header 
+     comment).  */
+
+  tree t = build_call_expr
+    (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
+  t = fold_convert (type, t);
+  tree low_val = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+					   GSI_SAME_STMT);
+  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM),
+		       0);
+  t = fold_convert (type, t);
+  tree high_val = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+					   GSI_SAME_STMT);
+
+  tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
+  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+  
+  /* Not needed in SSA form right now.  */
+  gcc_assert (!gimple_in_ssa_p (cfun));
+  if (l2_dom_bb == NULL)
+    l2_dom_bb = l1_bb;
+
+  tree n1 = low_val;
+  tree n2 = high_val;
+  
+  expand_omp_build_assign (&gsi, ind_var, n1);
+
+  /* Remove the GIMPLE_OMP_FOR statement.  */
+  gsi_remove (&gsi, true);
+
+  gimple stmt;
+  if (!broken_loop)
+    {
+      /* Code to control the increment goes in the CONT_BB.  */
+      gsi = gsi_last_bb (cont_bb);
+      stmt = gsi_stmt (gsi);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+      enum tree_code code = PLUS_EXPR;
+      if (POINTER_TYPE_P (type))
+	t = fold_build_pointer_plus (ind_var, build_one_cst (type)); 
+      else
+	t = fold_build2 (code, type, ind_var, build_one_cst (type));
+      expand_omp_build_assign (&gsi, ind_var, t);
+
+      /* Remove GIMPLE_OMP_CONTINUE.  */
+      gsi_remove (&gsi, true);
+    }
+
+  /* Emit the condition in L1_BB.  */
+  gsi = gsi_start_bb (l1_bb);
+
+  tree step = fold_convert (type, fd->loop.step);
+  if ((TREE_CODE (step) == INTEGER_CST && tree_int_cst_sgn (step) < 1)) 
+    step = fold_build1_loc (UNKNOWN_LOCATION, NEGATE_EXPR, type, step);
+
+  tree step_var = create_tmp_reg (type, NULL);
+  gsi_insert_after (&gsi, gimple_build_assign (step_var, 
+					       fold_convert (type, step)), 
+		    GSI_NEW_STMT);
+  t = build2 (MULT_EXPR, type, ind_var, step_var);
+  tree tmp = create_tmp_reg (type, NULL);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp, t), GSI_NEW_STMT);
+
+  tree tmp2 = create_tmp_reg (type, NULL);
+  tree cvtd = fold_convert (type, fd->loop.n1);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp2, cvtd), GSI_NEW_STMT);
+  
+  if (fd->loop.cond_code == GE_EXPR || fd->loop.cond_code == GT_EXPR)
+    t = fold_build2 (MINUS_EXPR, type, tmp2, tmp);
+ else
+   t = fold_build2 (PLUS_EXPR, type, tmp2, tmp);
+
+  tmp = create_tmp_reg (type, NULL);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp, t), GSI_NEW_STMT);
+
+  cvtd = fold_convert (TREE_TYPE (fd->loop.v), tmp);
+  gsi_insert_after (&gsi, gimple_build_assign (fd->loop.v, cvtd), 
+		    GSI_NEW_STMT);
+  
+  t = fold_convert (type, n2);
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+				false, GSI_CONTINUE_LINKING);
+  /* The condition is always '<' since the runtime will fill in the low
+     and high values.  */
+  t = build2 (LT_EXPR, boolean_type_node, ind_var, t);
+  stmt = gimple_build_cond_empty (t);
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+  if (walk_tree (gimple_cond_lhs_ptr (stmt), expand_omp_regimplify_p,
+		 NULL, NULL)
+      || walk_tree (gimple_cond_rhs_ptr (stmt), expand_omp_regimplify_p,
+		    NULL, NULL))
+    {
+      gsi = gsi_for_stmt (stmt);
+      gimple_regimplify_operands (stmt, &gsi);
+    }
+
+  /* Remove GIMPLE_OMP_RETURN.  */
+  gsi = gsi_last_bb (exit_bb);
+  gsi_remove (&gsi, true);
+
+  /* Connect the new blocks.  */
+  remove_edge (FALLTHRU_EDGE (entry_bb));
+
+  edge e, ne;
+  if (!broken_loop)
+    {
+      remove_edge (BRANCH_EDGE (entry_bb));
+      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
+
+      e = BRANCH_EDGE (l1_bb);
+      ne = FALLTHRU_EDGE (l1_bb);
+      e->flags = EDGE_TRUE_VALUE;
+    }
+  else
+    {
+      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+      ne = single_succ_edge (l1_bb);
+      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
+
+    }
+  ne->flags = EDGE_FALSE_VALUE;
+  e->probability = REG_BR_PROB_BASE * 7 / 8;
+  ne->probability = REG_BR_PROB_BASE / 8;
+
+  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
+
+  if (!broken_loop)
+    {
+      struct loop *loop = alloc_loop ();
+      loop->header = l1_bb;
+      loop->latch = cont_bb;
+      add_loop (loop, l1_bb->loop_father);
+      loop->safelen = INT_MAX;
+    }
+
+  /* Pick the correct library function based on the precision of the
+     induction variable type.  */
+  tree lib_fun = NULL_TREE;
+  if (TYPE_PRECISION (type) == 32)
+    lib_fun = cilk_for_32_fndecl;
+  else if (TYPE_PRECISION (type) == 64)
+    lib_fun = cilk_for_64_fndecl;
+  else
+    gcc_unreachable ();
+
+  gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
+  
+  /* WS_ARGS contains the library function flavor to call: 
+     __cilkrts_cilk_for_64 or __cilkrts_cilk_for_32), and the
+     user-defined grain value.   If the user does not define one, then zero
+     is passed in by the parser.  */
+  vec_alloc (region->ws_args, 2);
+  region->ws_args->quick_push (lib_fun);
+  region->ws_args->quick_push (fd->chunk_size);
+}
 
 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
    loop.  Given parameters:
@@ -6880,6 +7351,8 @@  expand_omp_for (struct omp_region *region, gimple inner_stmt)
 
   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_SIMD)
     expand_omp_simd (region, &fd);
+  else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_CILKFOR)
+    expand_cilk_for (region, &fd);
   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
 	   && !fd.have_ordered)
     {
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
new file mode 100644
index 0000000..8b6112b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
@@ -0,0 +1,87 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+static void check (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start;  ii < end; ii = ii + incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+static void check_reverse (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start; ii >= end; ii = ii - incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+
+int main (void)
+{
+  int Array[10];
+  int x = 9, y = 0, z = 3;
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array[ii] = 1133;
+  check (Array, 0, 10, 1, 1133);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 9; ii > -1; ii--)
+    Array[ii] = 4433;
+  check_reverse (Array, 9, 0, 1, 4433);
+
+  _Cilk_for (int ii = 9; ii > -1; --ii)
+    Array[ii] = 9988;
+  check_reverse (Array, 9, 0, 1, 9988);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    Array[ii] = 1328;
+  check (Array, 0, 10, 2, 1328);
+
+  _Cilk_for (int ii = 9; ii >= 0; ii -= 2)
+    Array[ii] = 1738;
+  check_reverse (Array, 9, 0, 2, 1738);
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    {
+      if (ii % 2)
+	Array[ii] = 1343;
+      else
+	Array[ii] = 3413;
+    }
+
+  check (Array, 1, 10, 2, 1343); 
+  check (Array, 0, 10, 2, 3413); 
+
+  _Cilk_for (short cc = 0; cc < 10; cc++) 
+    Array[cc] = 1343;
+  check (Array, 0, 10,  1,1343);
+
+  _Cilk_for (short cc = 9; cc >= 0; cc--)
+    Array[cc] = 1348;
+  check_reverse (Array, 9, 0, 1, 1348);
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
new file mode 100644
index 0000000..ed73c34
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
@@ -0,0 +1,58 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+int main (void)
+{
+  int q = 0, ii = 0, jj = 0;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */
+    /* { dg-error "expected" "" { target c++ } 10 } */
+    q = 5;
+
+  _Cilk_for (; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ; ii++) /* { dg-error "missing controlling predicate" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ii < 10, jj < 10; ii++)  /* { dg-error "expected ';' before ',' token" "" { target c } } */
+    /* { dg-error "invalid controlling predicate" "" { target c++ }  20 } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ) /* { dg-error "missing increment" } */
+    q = 5;
+
+
+  _Cilk_for (int ii = 0, jj = 0; ii < 10; ii++) /* { dg-error "expected" } */ 
+    q = 5;
+
+  _Cilk_for (volatile int vii = 0; vii < 10; vii++) /* { dg-error "iteration variable cannot be volatile" } */
+    q = 5;
+
+ 
+  _Cilk_for (static int sii = 0; sii < 10; sii++) /* { dg-error "static" } */
+
+    q = 5;
+
+
+  _Cilk_for (float fii = 3.47; fii < 5.23; fii++) /* { dg-error "invalid type for iteration variable" } */
+    q = 5;
+
+
+  _Cilk_for (int ii = 0; 10 > jj; ii++) /* { dg-error "invalid controlling predicate" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ii >> 1) /* { dg-error "invalid increment expression" } */
+    q = 5;
+
+  _Cilk_for (int ii = 10; ii >= 0; ii--) /* This is OK!  */
+    q = 5;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */ 
+    /* { dg-error "expected" "" { target c++ }  53 } */
+    q = 5;
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
new file mode 100644
index 0000000..6cb9b03
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
@@ -0,0 +1,35 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+int grain_value = 2;
+int main (void)
+{
+  int Array1[200], Array1_Serial[200];
+
+  for (int ii = 0; ii < 200; ii++)
+    {
+      Array1_Serial[ii] = 2;
+      Array1[ii] = 1;
+    }
+
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 200; ii++)
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+#pragma cilk grainsize = grain_value
+  _Cilk_for (int ii = 0; ii < 200; ii++) 
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
new file mode 100644
index 0000000..e1e3217
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
@@ -0,0 +1,48 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus -Wunknown-pragmas" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+char Array1[26];
+
+#pragma cilk grainsize = 2 /* { dg-error "must be inside a function" } */
+
+int main(int argc, char **argv)
+{
+/* This is OK.  */
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize 2 /* { dg-error "expected '=' before numeric constant" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsiz = 2 /* { dg-warning "ignoring #pragma cilk grainsiz" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+
+/* This is OK, it will do a type conversion to long int.  */
+#pragma cilk grainsize = 0.5 
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize = 1 
+  while (Array1[5] != 0) /* { dg-warning "is not followed by" } */
+    {
+    /* Blah */
+    }
+
+#pragma cilk grainsize = 1 
+  int q = 0; /* { dg-warning "is not followed by" } */
+  _Cilk_for (q = 0; q < 10; q++)
+    Array1[q]  = 5;
+
+  while (Array1[5] != 0)
+    {
+    /* Blah */
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
new file mode 100644
index 0000000..7a779f7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+
+/* <feature> loop control variable must have integer, pointer or class type
+   </feature>
+*/
+
+#define ARRAY_SIZE 10000
+int a[ARRAY_SIZE];
+
+int main(void)
+{ 
+  int ii = 0;
+
+#if 1
+  for (ii =0; ii < ARRAY_SIZE; ii++)
+    a[ii] = 5;
+#endif
+  _Cilk_for(int *aa = a; aa < a + ARRAY_SIZE; aa++) 
+    *aa = 0;
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii++) 
+    if (a[ii] != 0) 
+      __builtin_abort ();
+#endif
+
+  _Cilk_for (int *aa = a; aa < a + ARRAY_SIZE; aa = aa + 2)
+    *aa = 4;
+
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii = ii + 2) 
+    if (a[ii] != 4) 
+      __builtin_abort ();
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
new file mode 100644
index 0000000..cffe17e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
@@ -0,0 +1,79 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+int main (void)
+{
+  int Array[10][10];
+
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj++)
+	{
+	  Array[ii][jj] = 0;
+	}
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 5; jj++)
+      Array[ii][jj] = 5;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 5; jj++)
+      if (Array[ii][jj] != 5)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+
+  /* One goes up and one goes down.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 9; jj >= 0; jj--)
+      Array[ii][jj] = 7;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 9; jj >= 0; jj--)
+      if (Array[ii][jj] != 7)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 10; jj += 2)
+      Array[ii][jj] = 9;
+  
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj += 2)
+      if (Array[ii][jj] != 9)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    _Cilk_for (int jj = 5; jj < 9; jj++)
+      Array[ii][jj] = 11; 
+  
+  for (int ii = 0; ii < 10; ii += 2)
+    for (int jj = 5; jj < 9; jj++)
+      if (Array[ii][jj] != 11)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/cf3.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/cf3.cc
new file mode 100644
index 0000000..8d88c5f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/cf3.cc
@@ -0,0 +1,96 @@ 
+/* { dg-options "-fcilkplus" } */
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+template <typename T>
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template <typename S> friend bool operator == (I<S> &, I<S> &);
+  template <typename S> friend bool operator == (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator < (I<S> &, I<S> &);
+  template <typename S> friend bool operator < (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator <= (I<S> &, I<S> &);
+  template <typename S> friend bool operator <= (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator > (I<S> &, I<S> &);
+  template <typename S> friend bool operator > (const I<S> &, const I<S> &);
+  template <typename S> friend bool operator >= (I<S> &, I<S> &);
+  template <typename S> friend bool operator >= (const I<S> &, const I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (I<S> &, I<S> &);
+  template <typename S> friend typename I<S>::difference_type operator - (const I<S> &, const I<S> &);
+  template <typename S> friend I<S> operator + (typename I<S>::difference_type , const I<S> &);
+private:
+  T *p;
+};
+template <typename T> I<T>::I () : p (0) {}
+template <typename T> I<T>::~I () {}
+template <typename T> I<T>::I (T *x) : p (x) {}
+template <typename T> I<T>::I (const I &x) : p (x.p) {}
+template <typename T> T &I<T>::operator * () { return *p; }
+template <typename T> T *I<T>::operator -> () { return p; }
+template <typename T> T &I<T>::operator [] (const difference_type &x) const { return p[x]; }
+template <typename T> I<T> &I<T>::operator = (const I &x) { p = x.p; return *this; }
+template <typename T> I<T> &I<T>::operator ++ () { ++p; return *this; }
+template <typename T> I<T> I<T>::operator ++ (int) { return I (p++); }
+template <typename T> I<T> &I<T>::operator -- () { --p; return *this; }
+template <typename T> I<T> I<T>::operator -- (int) { return I (p--); }
+template <typename T> I<T> &I<T>::operator += (const difference_type &x) { p += x; return *this; }
+template <typename T> I<T> &I<T>::operator -= (const difference_type &x) { p -= x; return *this; }
+template <typename T> I<T> I<T>::operator + (const difference_type &x) const { return I (p + x); }
+template <typename T> I<T> I<T>::operator - (const difference_type &x) const { return I (p - x); }
+template <typename T> bool operator == (I<T> &x, I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator == (const I<T> &x, const I<T> &y) { return x.p == y.p; }
+template <typename T> bool operator != (I<T> &x, I<T> &y) { return !(x == y); }
+template <typename T> bool operator != (const I<T> &x, const I<T> &y) { return !(x == y); }
+template <typename T> bool operator < (I<T> &x, I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator < (const I<T> &x, const I<T> &y) { return x.p < y.p; }
+template <typename T> bool operator <= (I<T> &x, I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator <= (const I<T> &x, const I<T> &y) { return x.p <= y.p; }
+template <typename T> bool operator > (I<T> &x, I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator > (const I<T> &x, const I<T> &y) { return x.p > y.p; }
+template <typename T> bool operator >= (I<T> &x, I<T> &y) { return x.p >= y.p; }
+template <typename T> bool operator >= (const I<T> &x, const I<T> &y) { return x.p >= y.p; }
+template <typename T> typename I<T>::difference_type operator - (I<T> &x, I<T> &y) { return x.p - y.p; }
+template <typename T> typename I<T>::difference_type operator - (const I<T> &x, const I<T> &y) { return x.p - y.p; }
+template <typename T> I<T> operator + (typename I<T>::difference_type x, const I<T> &y) { return I<T> (x + y.p); }
+
+template <typename T>
+class J
+{
+public:
+  J(const I<T> &x, const I<T> &y) : b (x), e (y) {}
+  const I<T> &begin ();
+  const I<T> &end ();
+private:
+  I<T> b, e;
+};
+
+template <typename T> const I<T> &J<T>::begin () { return b; }
+template <typename T> const I<T> &J<T>::end () { return e; }
+
+template <typename T>
+void baz (I<T> &i);
+
+void
+foo (J<int> j)
+{
+  _Cilk_for (I<int> i = j.begin (); i < j.end (); i += 2)
+    baz (i);
+}
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
new file mode 100644
index 0000000..8221371
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
@@ -0,0 +1,25 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 100
+#define CHECK_VALUE 5
+
+template <class T>
+int func (T start, T end)
+{
+  int Array[SIZE];
+  _Cilk_for (T ii = 0; ii < end; ii++)
+    Array[ii] = CHECK_VALUE;
+  
+  for (T ii = 0; ii < end; ii++)
+    if (Array[ii] != CHECK_VALUE)
+      __builtin_abort ();
+
+  return 0;
+}
+
+int main (void)
+{
+  return func <int> (0, 100) + func <long> (0, 100);
+}
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
new file mode 100644
index 0000000..2ac8c72
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
@@ -0,0 +1,52 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array;
+vector <int> array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back (ii);
+}
+#endif
+_Cilk_for (vector<int>::iterator iter = array.begin(); iter != array.end();
+          iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+for (vector<int>::iterator iter = array_serial.begin(); 
+     iter != array_serial.end(); iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
new file mode 100644
index 0000000..1cf3301
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
@@ -0,0 +1,72 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array,array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back(ii);
+}
+#endif
+_Cilk_for (vector<int>::reverse_iterator iter4 = array.rbegin(); 
+	   iter4 != array.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+
+_Cilk_for (vector<int>::reverse_iterator iter4 = array_serial.rbegin(); 
+	   iter4 != array_serial.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+_Cilk_for (vector<int>::reverse_iterator iter2 = array.rbegin(); 
+	   iter2 != array.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+for (vector<int>::reverse_iterator iter2 = array_serial.rbegin(); 
+     iter2 != array_serial.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
+
+
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
new file mode 100644
index 0000000..8d2e61e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
@@ -0,0 +1,50 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <list>
+
+using namespace std;
+
+
+int main(int argc, char **argv)
+{
+  vector <int> number_list, number_list_serial;
+  int new_number = 0;
+  int no_elements = 0;
+  
+  if (argc != 2)
+  {
+    no_elements = 10;
+  }
+
+
+  number_list.clear();
+  number_list_serial.clear();
+  for (int ii = 0; ii < no_elements; ii++)
+  {
+    number_list.push_back(new_number);
+    number_list_serial.push_back(new_number);
+  }
+
+  _Cilk_for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list[jj] = jj + no_elements;
+  }
+  for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list_serial[jj] = jj + no_elements;
+  }
+
+  for (int jj = 0; jj < no_elements; jj++)
+    if (number_list_serial[jj] != number_list[jj])
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index e548a0d..d8c14e3 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -351,6 +351,7 @@  enum omp_clause_schedule_kind {
   OMP_CLAUSE_SCHEDULE_GUIDED,
   OMP_CLAUSE_SCHEDULE_AUTO,
   OMP_CLAUSE_SCHEDULE_RUNTIME,
+  OMP_CLAUSE_SCHEDULE_CILKFOR,
   OMP_CLAUSE_SCHEDULE_LAST
 };
 
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 0595499..91efd9f 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -411,6 +411,9 @@  dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
 	case OMP_CLAUSE_SCHEDULE_AUTO:
 	  pp_string (buffer, "auto");
 	  break;
+	case OMP_CLAUSE_SCHEDULE_CILKFOR:
+	  pp_string (buffer, "cilk-for grain");
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -2392,6 +2395,12 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
       pp_string (buffer, "#pragma simd");
       goto dump_omp_loop;
 
+    case CILK_FOR:
+      /* This label points one line after dumping the clauses.  
+	 For _Cilk_for the clauses are dumped after the _Cilk_for (...) 
+	 parameters are printed out.  */
+      goto dump_omp_loop_cilk_for;
+
     case OMP_DISTRIBUTE:
       pp_string (buffer, "#pragma omp distribute");
       goto dump_omp_loop;
@@ -2420,6 +2429,8 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
     dump_omp_loop:
       dump_omp_clauses (buffer, OMP_FOR_CLAUSES (node), spc, flags);
 
+    dump_omp_loop_cilk_for:
+
       if (!(flags & TDF_SLIM))
 	{
 	  int i;
@@ -2440,7 +2451,10 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
 		{
 		  spc += 2;
 		  newline_and_indent (buffer, spc);
-		  pp_string (buffer, "for (");
+		  if (TREE_CODE (node) == CILK_FOR)
+		    pp_string (buffer, "_Cilk_for (");
+		  else 
+		    pp_string (buffer, "for (");
 		  dump_generic_node (buffer,
 				     TREE_VEC_ELT (OMP_FOR_INIT (node), i),
 				     spc, flags, false);
@@ -2454,6 +2468,8 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
 				     spc, flags, false);
 		  pp_right_paren (buffer);
 		}
+	      if (TREE_CODE (node) == CILK_FOR) 
+		dump_omp_clauses (buffer, OMP_FOR_CLAUSES (node), spc, flags);
 	    }
 	  if (OMP_FOR_BODY (node))
 	    {
diff --git a/gcc/tree.def b/gcc/tree.def
index f8d6444..558d7c8 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1051,6 +1051,10 @@  DEFTREECODE (OMP_SIMD, "omp_simd", tcc_statement, 6)
    Operands like for OMP_FOR.  */
 DEFTREECODE (CILK_SIMD, "cilk_simd", tcc_statement, 6)
 
+/* Cilk Plus - _Cilk_for (..)
+   Operands like for OMP_FOR.  */
+DEFTREECODE (CILK_FOR, "cilk_for", tcc_statement, 6)
+
 /* OpenMP - #pragma omp distribute [clause1 ... clauseN]
    Operands like for OMP_FOR.  */
 DEFTREECODE (OMP_DISTRIBUTE, "omp_distribute", tcc_statement, 6)