diff mbox

_Cilk_for for C and C++

Message ID BF230D13CA30DD48930C31D4099330003A4B86D5@FMSMSX101.amr.corp.intel.com
State New
Headers show

Commit Message

Iyer, Balaji V Jan. 7, 2014, 10:11 p.m. UTC
> -----Original Message-----
> From: Jakub Jelinek [mailto:jakub@redhat.com]
> Sent: Tuesday, January 7, 2014 4:29 PM
> To: Iyer, Balaji V
> Cc: Jason Merrill; 'Jeff Law'; 'Aldy Hernandez'; 'gcc-patches@gcc.gnu.org';
> 'rth@redhat.com'
> Subject: Re: [PATCH] _Cilk_for for C and C++
> 
> On Tue, Jan 07, 2014 at 09:24:21PM +0000, Iyer, Balaji V wrote:
> > > -----Original Message-----
> > > From: Jason Merrill [mailto:jason@redhat.com]
> > > Sent: Tuesday, January 7, 2014 3:41 PM
> > > To: Iyer, Balaji V; 'Jeff Law'; 'Aldy Hernandez'
> > > Cc: 'gcc-patches@gcc.gnu.org'; 'rth@redhat.com'; 'Jakub Jelinek'
> > > Subject: Re: [PATCH] _Cilk_for for C and C++
> > >
> > > On 12/17/2013 07:21 PM, Iyer, Balaji V wrote:
> > > > The reason why I store it in OMP_FOR_CLAUSE is because OMP clauses
> > > cannot occur in _Cilk_for. So adding a new clause seem to be an
> > > overkill IMHO. I need a place to store the grain value and so I chose this
> spot.
> > >
> > > But code expects OMP_FOR_CLAUSES to have a certain form, and you
> are
> > > violating that so that now code needs to check whether we're dealing
> > > with a for loop in order to know to parse OMP_FOR_CLAUSES.  Doing it
> > > your way requires lots of little special cases.  Please represent it as a
> clause.
> >
> > Hi Jason,
> > 	In gimplify_omp_for, I remove the information in
> OMP_FOR_CLAUSES ()
> > 	and then replace it with a NULL_TREE.  Till that point, nothing
> > 	steps on it (except in pt.c and that I am handling it).  Then the
> > 	grain value is stored in gimple tree for omp_for.
> 
> So, you are abusing OMP_FOR_CLAUSES for shorter time, still, I agree with
> Jason that you shouldn't do that.
> 
> If you don't want to add a new clause, just use a similar existing one, if grain
> is something like scheduling chunk size, just with a different name for it, then
> using OMP_CLAUSE_SCHEDULE with OMP_CLAUSE_SCHEDULE_EXPR being
> the grain expression would be certainly cleaner.
> But even adding a new artificial clause isn't that hard.
> 

Hi Jason and Jakub,
	I used a similar existing one (safelen). Attached, please find 2 fixed patches for C and C++ along with their changelogs.

Is this OK for trunk?

Thanks,

Balaji V. Iyer.

> 	Jakub

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 40d12bc..9d24691
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -414,6 +414,7 @@ const struct c_common_resword c_common_reswords[] =
   { "_Complex",		RID_COMPLEX,	0 },
   { "_Cilk_spawn",      RID_CILK_SPAWN, 0 },
   { "_Cilk_sync",       RID_CILK_SYNC,  0 },
+  { "_Cilk_for",        RID_CILK_FOR,   0 },
   { "_Imaginary",	RID_IMAGINARY, D_CONLY },
   { "_Decimal32",       RID_DFLOAT32,  D_CONLY | D_EXT },
   { "_Decimal64",       RID_DFLOAT64,  D_CONLY | D_EXT },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 7e3ece6..0eaebf3
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -149,7 +149,7 @@ enum rid
   RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
 
   /* Cilk Plus keywords.  */
-  RID_CILK_SPAWN, RID_CILK_SYNC,
+  RID_CILK_SPAWN, RID_CILK_SYNC, RID_CILK_FOR,
   
   /* Objective-C ("AT" reserved words - they are only keywords when
      they follow '@')  */
diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index ac380ee..b15cd4c
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -386,7 +386,7 @@ c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
   bool fail = false;
   int i;
 
-  if (code == CILK_SIMD
+  if ((code == CILK_SIMD || code == CILK_FOR) 
       && !c_check_cilk_loop (locus, TREE_VEC_ELT (declv, 0)))
     fail = true;
 
@@ -516,7 +516,7 @@ c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 					    0))
 		    TREE_SET_CODE (cond, TREE_CODE (cond) == NE_EXPR
 					 ? LT_EXPR : GE_EXPR);
-		  else if (code != CILK_SIMD)
+		  else if (code != CILK_SIMD && code != CILK_FOR)
 		    cond_ok = false;
 		}
 	    }
diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index af28085..6f22148
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1394,6 +1394,11 @@ init_pragma (void)
 
   cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				false);
+
+  if (flag_enable_cilkplus && !flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
+				  PRAGMA_CILK_GRAINSIZE, true, false);
+
 #ifdef HANDLE_PRAGMA_PACK_WITH_EXPANSION
   c_register_pragma_with_expansion (0, "pack", handle_pragma_pack);
 #else
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 6f1bf74..b9f09ba
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -55,6 +55,9 @@ typedef enum pragma_kind {
   /* Top level clause to handle all Cilk Plus pragma simd clauses.  */
   PRAGMA_CILK_SIMD,
 
+  /* This pragma handles setting of grainsize for a _Cilk_for.  */
+  PRAGMA_CILK_GRAINSIZE,
+
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index f73df08..f0320ec
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -1248,10 +1248,11 @@ static bool c_parser_objc_diagnose_bad_element_prefix
   (c_parser *, struct c_declspecs *);
 
 /* Cilk Plus supporting routines.  */
-static void c_parser_cilk_simd (c_parser *);
+static void c_parser_cilk_simd (c_parser *, tree);
 static bool c_parser_cilk_verify_simd (c_parser *, enum pragma_context);
 static tree c_parser_array_notation (location_t, c_parser *, tree, tree);
 static tree c_parser_cilk_clause_vectorlength (c_parser *, tree, bool);
+static void c_parser_cilk_grainsize (c_parser *);
 
 /* Parse a translation unit (C90 6.7, C99 6.9).
 
@@ -4878,6 +4879,16 @@ c_parser_statement_after_labels (c_parser *parser)
 	case RID_FOR:
 	  c_parser_for_statement (parser, false);
 	  break;
+	case RID_CILK_FOR:
+	  if (!flag_enable_cilkplus)
+	    {
+	      error_at (c_parser_peek_token (parser)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      c_parser_skip_to_end_of_block_or_statement (parser);
+	    }
+	  else
+	    c_parser_cilk_simd (parser, integer_zero_node);
+	  break;
 	case RID_CILK_SYNC:
 	  c_parser_consume_token (parser);
 	  c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
@@ -9488,7 +9499,25 @@ c_parser_pragma (c_parser *parser, enum pragma_context context)
       if (!c_parser_cilk_verify_simd (parser, context))
 	return false;
       c_parser_consume_pragma (parser);
-      c_parser_cilk_simd (parser);
+      c_parser_cilk_simd (parser, NULL_TREE);
+      return false;
+
+    case PRAGMA_CILK_GRAINSIZE:
+      if (!flag_enable_cilkplus)
+	{
+	  warning (0, "%<#pragma grainsize%> ignored because -fcilkplus is not"
+		   " enabled");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      if (context == pragma_external)
+	{
+	  error_at (c_parser_peek_token (parser)->location,
+		    "%<#pragma grainsize%> must be inside a function");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      c_parser_cilk_grainsize (parser);
       return false;
 
     default:
@@ -11583,7 +11612,7 @@ c_parser_omp_flush (c_parser *parser)
 
 static tree
 c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
-		       tree clauses, tree *cclauses)
+		       tree clauses_or_grain, tree *cclauses)
 {
   tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl;
   tree declv, condv, incrv, initv, ret = NULL;
@@ -11591,6 +11620,9 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   int i, collapse = 1, nbraces = 0;
   location_t for_loc;
   vec<tree, va_gc> *for_block = make_tree_vector ();
+  tree clauses = code == CILK_FOR ? NULL_TREE : clauses_or_grain;
+  tree grain = code == CILK_FOR ? clauses_or_grain : NULL_TREE;
+  tree top_body = NULL_TREE, top_level_body = NULL_TREE;
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
@@ -11603,11 +11635,18 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   condv = make_tree_vec (collapse);
   incrv = make_tree_vec (collapse);
 
-  if (!c_parser_next_token_is_keyword (parser, RID_FOR))
+  if (code != CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_FOR))
     {
       c_parser_error (parser, "for statement expected");
       return NULL;
     }
+  if (code == CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_CILK_FOR))
+    {
+      c_parser_error (parser, "_Cilk_for statement expected");
+      return NULL;
+    }
   for_loc = c_parser_peek_token (parser)->location;
   c_parser_consume_token (parser);
 
@@ -11685,7 +11724,7 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 	    case LE_EXPR:
 	      break;
 	    case NE_EXPR:
-	      if (code == CILK_SIMD)
+	      if (code == CILK_SIMD || code == CILK_FOR)
 		break;
 	      /* FALLTHRU.  */
 	    default:
@@ -11767,6 +11806,12 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
     c_break_label = size_one_node;
   save_cont = c_cont_label;
   c_cont_label = NULL_TREE;
+
+  if (code == CILK_FOR)
+    {
+      top_level_body = push_stmt_list ();
+      top_body = c_begin_omp_parallel ();
+    }
   body = push_stmt_list ();
 
   if (open_brace_parsed)
@@ -11814,6 +11859,13 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 	}
     }
 
+  if (code == CILK_FOR)
+    {
+      body = add_stmt (body);
+      body = c_finish_omp_parallel (loc, NULL_TREE, top_body);
+      body = pop_stmt_list (top_level_body);
+    }
+
   /* Only bother calling c_finish_omp_for if we haven't already generated
      an error from the initialization parsing.  */
   if (!fail)
@@ -11859,6 +11911,17 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 		  }
 	    }
 	  OMP_FOR_CLAUSES (stmt) = clauses;
+	  /* If it is a _Cilk_for statement, then the OMP_FOR_CLAUSES location
+	     stores the user-defined grain value or an integer_zero_node 
+	     indicating that the runtime must compute a suitable grain.  */
+	  if (code == CILK_FOR)
+	    {
+	      tree l = build_omp_clause (EXPR_LOCATION (grain),
+					 OMP_CLAUSE_SAFELEN);
+	      OMP_CLAUSE_SAFELEN_EXPR (l) = grain;
+	      OMP_CLAUSE_CHAIN (l) = NULL_TREE;
+	      OMP_FOR_CLAUSES (stmt) = l;
+	    }
 	}
       ret = stmt;
     }
@@ -13762,16 +13825,65 @@ c_parser_cilk_all_clauses (c_parser *parser)
   return c_finish_cilk_clauses (clauses);
 }
 
+/* This function helps parse the grainsize pragma for a _Cilk_for statement. 
+   Here is the correct syntax of this pragma: 
+	    #pragma cilk grainsize = <EXP>  */
+
+static void
+c_parser_cilk_grainsize (c_parser *parser)
+{
+  extern tree convert_to_integer (tree, tree);
+
+  /* consume the 'grainsize' keyword.  */
+  c_parser_consume_pragma (parser);
+
+  if (c_parser_require (parser, CPP_EQ, "expected %<=%>") != 0)
+    {
+      struct c_expr g_expr = c_parser_binary_expression (parser, NULL, NULL);
+      if (g_expr.value && TREE_CODE (g_expr.value) == C_MAYBE_CONST_EXPR)
+	{
+	  error_at (input_location, "cannot convert grain to long integer.\n");
+	  c_parser_skip_to_pragma_eol (parser);
+	}   
+      else if (g_expr.value && g_expr.value != error_mark_node)
+	{
+	  c_parser_skip_to_pragma_eol (parser);
+	  c_token *token = c_parser_peek_token (parser);
+	  if (token && token->type == CPP_KEYWORD
+	      && token->keyword == RID_CILK_FOR)
+	    {
+	      tree grain = convert_to_integer (long_integer_type_node,
+					       g_expr.value);
+	      if (grain && grain != error_mark_node) 
+		c_parser_cilk_simd (parser, grain);
+	    }
+	  else
+	    warning (0, "grainsize pragma is not followed by %<_Cilk_for%>");
+	}
+      else
+	c_parser_skip_to_pragma_eol (parser);
+    }
+  else
+    c_parser_skip_to_pragma_eol (parser);
+}
+
 /* Main entry point for parsing Cilk Plus <#pragma simd> for
    loops.  */
 
 static void
-c_parser_cilk_simd (c_parser *parser)
+c_parser_cilk_simd (c_parser *parser, tree grain)
 {
-  tree clauses = c_parser_cilk_all_clauses (parser);
+  bool is_cilk_for = grain == NULL_TREE ? false : true;
+  tree clauses = NULL_TREE;
+  
+  if (!is_cilk_for)
+    clauses = c_parser_cilk_all_clauses (parser);
+  else 
+    clauses = grain;
   tree block = c_begin_compound_stmt (true);
   location_t loc = c_parser_peek_token (parser)->location;
-  c_parser_omp_for_loop (loc, parser, CILK_SIMD, clauses, NULL);
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  c_parser_omp_for_loop (loc, parser, code, clauses, NULL);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 }
diff --git a/gcc/cilk-builtins.def b/gcc/cilk-builtins.def
index 9f3240a..bf319d5
--- a/gcc/cilk-builtins.def
+++ b/gcc/cilk-builtins.def
@@ -31,3 +31,5 @@ DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SYNC, "__cilkrts_sync")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_LEAVE_FRAME, "__cilkrts_leave_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_POP_FRAME, "__cilkrts_pop_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SAVE_FP, "__cilkrts_save_fp_ctrl_state")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_32, "__cilkrts_cilk_for_32")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_64, "__cilkrts_cilk_for_64")
diff --git a/gcc/cilk-common.c b/gcc/cilk-common.c
index afe88c9..bc1092b
--- a/gcc/cilk-common.c
+++ b/gcc/cilk-common.c
@@ -106,6 +106,26 @@ install_builtin (const char *name, tree fntype, enum built_in_function code,
   return fndecl;
 }
 
+/* Returns a FUNCTION_DECL of type TYPE whose name is *NAME.  */
+
+static tree
+cilk_declare_looper (const char *name, tree type, enum built_in_function code)
+{
+  tree cb, ft, fn;
+
+  cb = build_function_type_list (void_type_node,
+                                 ptr_type_node, type, type,
+                                 NULL_TREE);
+  cb = build_pointer_type (cb);
+  ft = build_function_type_list (void_type_node,
+                                 cb, ptr_type_node, type,
+                                 integer_type_node, NULL_TREE);
+  fn = install_builtin (name, ft, code, false);
+  TREE_NOTHROW (fn) = 0;
+
+  return fn;
+}
+
 /* Creates and initializes all the built-in Cilk keywords functions and three
    structures: __cilkrts_stack_frame, __cilkrts_pedigree and __cilkrts_worker.
    Detailed information about __cilkrts_stack_frame and
@@ -269,6 +289,15 @@ cilk_init_builtins (void)
   cilk_save_fp_fndecl = install_builtin ("__cilkrts_save_fp_ctrl_state", 
 					 fptr_fun, BUILT_IN_CILK_SAVE_FP,
 					 false);
+  /* __cilkrts_cilk_for_32 (...);  */
+  cilk_for_32_fndecl = cilk_declare_looper ("__cilkrts_cilk_for_32",
+                                            unsigned_intSI_type_node,
+                                            BUILT_IN_CILK_FOR_32);
+  /* __cilkrts_cilk_for_64 (...);  */
+  cilk_for_64_fndecl = cilk_declare_looper ("__cilkrts_cilk_for_64",
+                                            unsigned_intDI_type_node,
+                                            BUILT_IN_CILK_FOR_64);
+
 }
 
 /* Get the appropriate frame arguments for CALL that is of type CALL_EXPR.  */
diff --git a/gcc/cilk.h b/gcc/cilk.h
index d2ae931..0e98998
--- a/gcc/cilk.h
+++ b/gcc/cilk.h
@@ -40,6 +40,9 @@ enum cilk_tree_index  {
   CILK_TI_F_POP,                      /* __cilkrts_pop_frame (...).  */
   CILK_TI_F_RETHROW,                  /* __cilkrts_rethrow (...).  */
   CILK_TI_F_SAVE_FP,                  /* __cilkrts_save_fp_ctrl_state (...).  */
+  CILK_TI_F_LOOP_32,                  /* __cilkrts_cilk_for_32 (...).  */
+  CILK_TI_F_LOOP_64,                  /* __cilkrts_cilk_for_64 (...).  */
+
   /* __cilkrts_stack_frame struct fields.  */
   CILK_TI_FRAME_FLAGS,                /* stack_frame->flags.  */
   CILK_TI_FRAME_PARENT,               /* stack_frame->parent.  */
@@ -77,6 +80,8 @@ extern GTY (()) tree cilk_trees[CILK_TI_MAX];
 #define cilk_rethrow_fndecl           cilk_trees[CILK_TI_F_RETHROW]
 #define cilk_pop_fndecl               cilk_trees[CILK_TI_F_POP]
 #define cilk_save_fp_fndecl           cilk_trees[CILK_TI_F_SAVE_FP]
+#define cilk_for_32_fndecl            cilk_trees[CILK_TI_F_LOOP_32]
+#define cilk_for_64_fndecl            cilk_trees[CILK_TI_F_LOOP_64]
 
 #define cilk_worker_type_fndecl       cilk_trees[CILK_TI_WORKER_TYPE]
 #define cilk_frame_type_decl          cilk_trees[CILK_TI_FRAME_TYPE]
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 2d1e1c7..1e7bebf 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1158,6 +1158,9 @@ dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	case GF_OMP_FOR_KIND_DISTRIBUTE:
 	  pp_string (buffer, "#pragma omp distribute");
 	  break;
+	case GF_OMP_FOR_KIND_CILKFOR:
+	  gcc_assert (flag_enable_cilkplus);
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -1167,7 +1170,11 @@ dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	  if (i)
 	    spc += 2;
 	  newline_and_indent (buffer, spc);
-	  pp_string (buffer, "for (");
+	  if (flag_enable_cilkplus 
+	      && gimple_omp_for_kind (gs) == GF_OMP_FOR_KIND_CILKFOR)
+	    pp_string (buffer, "_Cilk_for (");
+	  else
+	    pp_string (buffer, "for (");
 	  dump_generic_node (buffer, gimple_omp_for_index (gs, i), spc,
 			     flags, false);
 	  pp_string (buffer, " = ");
@@ -1192,6 +1199,9 @@ dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	    case GE_EXPR:
 	      pp_greater_equal (buffer);
 	      break;
+	    case NE_EXPR:
+	      pp_string (buffer, "!=");
+	      break;
 	    default:
 	      gcc_unreachable ();
 	    }
diff --git a/gcc/gimple.h b/gcc/gimple.h
index df92863..42304fd
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -91,13 +91,14 @@ enum gf_mask {
     GF_CALL_ALLOCA_FOR_VAR	= 1 << 5,
     GF_CALL_INTERNAL		= 1 << 6,
     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
-    GF_OMP_FOR_KIND_MASK	= 3 << 0,
+    GF_OMP_FOR_KIND_MASK	= 7 << 0,
     GF_OMP_FOR_KIND_FOR		= 0 << 0,
     GF_OMP_FOR_KIND_DISTRIBUTE	= 1 << 0,
     GF_OMP_FOR_KIND_SIMD	= 2 << 0,
     GF_OMP_FOR_KIND_CILKSIMD	= 3 << 0,
-    GF_OMP_FOR_COMBINED		= 1 << 2,
-    GF_OMP_FOR_COMBINED_INTO	= 1 << 3,
+    GF_OMP_FOR_KIND_CILKFOR     = 4 << 0,
+    GF_OMP_FOR_COMBINED		= 1 << 3,
+    GF_OMP_FOR_COMBINED_INTO	= 1 << 4,
     GF_OMP_TARGET_KIND_MASK	= 3 << 0,
     GF_OMP_TARGET_KIND_REGION	= 0 << 0,
     GF_OMP_TARGET_KIND_DATA	= 1 << 0,
@@ -523,6 +524,9 @@ struct GTY(()) gimple_omp_for_iter {
 
   /* Increment.  */
   tree incr;
+
+  /* Grain value, only used by _Cilk_for.  */
+  tree grain;
 };
 
 /* GIMPLE_OMP_FOR */
@@ -4562,6 +4566,37 @@ gimple_omp_for_set_pre_body (gimple gs, gimple_seq pre_body)
   omp_for_stmt->pre_body = pre_body;
 }
 
+/* Set GRAIN to be the grain value used by Cilk runtime for OMP_FOR GS.  */
+
+static inline void
+gimple_cilk_for_set_grain (tree grain, gimple gs)
+{
+  const gimple_statement_omp_for *omp_for_stmt =
+    as_a <gimple_statement_omp_for> (gs);
+  omp_for_stmt->iter[0].grain = grain;
+}
+
+/* Returns the induction variable of type TREE from GS that is of type 
+   GIMPLE_STATEMENT_OMP_FOR.  */
+
+static inline tree
+gimple_cilk_for_induction_var (const_gimple gs)
+{
+  const gimple_statement_omp_for *cilk_for_stmt =
+    as_a <const gimple_statement_omp_for> (gs);
+  return cilk_for_stmt->iter->index;
+}
+
+/* Returns the GRAIN value of type TREE from GS that is of type 
+   GIMPLE_STATEMENT_OMP_FOR.  */
+
+static inline tree
+gimple_cilk_for_grain (const_gimple gs)
+{
+  const gimple_statement_omp_for *cilk_for_stmt =
+    as_a <const gimple_statement_omp_for> (gs);
+  return cilk_for_stmt->iter->grain;
+}
 
 /* Return the clauses associated with OMP_PARALLEL GS.  */
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index a6e0c75..09e4d33
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6559,7 +6559,19 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
   bool simd;
   bitmap has_decl_expr = NULL;
 
+  tree grain = NULL_TREE;
+  tree orig_init = NULL_TREE, orig_cond = NULL_TREE, orig_incr = NULL_TREE;
   orig_for_stmt = for_stmt = *expr_p;
+  
+  if (TREE_CODE (for_stmt) == CILK_FOR) 
+    { 
+      /* The user cannot pass any clauses for _Cilk_for,
+	 thus the grain value is stored in
+	 a safelen clause.  */
+      grain = OMP_FOR_CLAUSES (for_stmt);
+      grain = OMP_CLAUSE_SAFELEN_EXPR (grain);
+      OMP_FOR_CLAUSES (for_stmt) = NULL_TREE;
+    }
 
   simd = TREE_CODE (for_stmt) == OMP_SIMD
     || TREE_CODE (for_stmt) == CILK_SIMD;
@@ -6603,6 +6615,11 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
     }
 
   for_body = NULL;
+  if (flag_enable_cilkplus && TREE_CODE (for_stmt) == CILK_FOR)
+    {
+      tree it = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), 0);
+      gimplify_and_add (it, &for_pre_body);
+    }
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
@@ -6677,7 +6694,12 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	}
       else
 	var = decl;
-
+ 
+      /* Original initial, final and increment values are necessary to compute
+	 the loop-count.  Otherwise, they are stored in variables and their
+	 context could be changed, potentially making it impossible to compute
+	 them correctly.  */
+      orig_init = TREE_OPERAND (t, 1);
       tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL,
 			    is_gimple_val, fb_rvalue);
       ret = MIN (ret, tret);
@@ -6689,10 +6711,18 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_assert (COMPARISON_CLASS_P (t));
       gcc_assert (TREE_OPERAND (t, 0) == decl);
 
-      tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL,
-			    is_gimple_val, fb_rvalue);
-      ret = MIN (ret, tret);
-
+      if (flag_enable_cilkplus && TREE_CODE (for_stmt) == CILK_FOR)
+	{
+	  int x = 1;
+	  orig_cond = TREE_OPERAND (t, 1);
+	  copy_tree_r (&orig_cond, &x, NULL);
+	}
+      else
+	{
+	  tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL, 
+				is_gimple_val, fb_rvalue);
+	  ret = MIN (ret, tret);
+	}
       /* Handle OMP_FOR_INCR.  */
       t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
       switch (TREE_CODE (t))
@@ -6713,6 +6743,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	    t = build2 (PLUS_EXPR, TREE_TYPE (decl), var, t);
 	    t = build2 (MODIFY_EXPR, TREE_TYPE (var), var, t);
 	    TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i) = t;
+	    orig_incr = build_one_cst (TREE_TYPE (t));
 	    break;
 	  }
 
@@ -6726,6 +6757,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	  t = build2 (PLUS_EXPR, TREE_TYPE (decl), var, t);
 	  t = build2 (MODIFY_EXPR, TREE_TYPE (var), var, t);
 	  TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i) = t;
+	  orig_incr = build_one_cst (TREE_TYPE (t));
 	  break;
 
 	case MODIFY_EXPR:
@@ -6753,8 +6785,16 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      gcc_unreachable ();
 	    }
 
-	  tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL,
-				is_gimple_val, fb_rvalue);
+	  orig_incr = TREE_OPERAND (t, 1);
+	  /* Right here we are just trying to extract the absolute
+	     value of the increment.  */
+	  if (TREE_CODE (t) == MINUS_EXPR
+	      || TREE_CODE  (TREE_OPERAND (t, 1)) == NEGATE_EXPR
+	      || (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST
+		  && tree_int_cst_sgn (TREE_OPERAND (t, 1)) < 1))
+	    orig_incr = fold_build1 (NEGATE_EXPR, TREE_TYPE (t), orig_incr);
+	  tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body,
+				NULL, is_gimple_val, fb_rvalue);
 	  ret = MIN (ret, tret);
 	  if (c)
 	    {
@@ -6802,8 +6842,57 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 
   BITMAP_FREE (has_decl_expr);
 
+  tree incr_val = NULL_TREE, init_val = NULL_TREE, cond_val = NULL_TREE;
+  if (TREE_CODE (orig_for_stmt) == CILK_FOR)
+    {
+      tree stmt_list = alloc_stmt_list ();
+      incr_val = create_tmp_var (TREE_TYPE (orig_incr), "__cilk_incr");
+      tree mod = build2 (MODIFY_EXPR, TREE_TYPE (orig_incr), incr_val,
+			 orig_incr);
+      append_to_statement_list (mod, &stmt_list);
+
+      init_val = create_tmp_var (TREE_TYPE (orig_init), "__cilk_init");
+      mod = build2 (MODIFY_EXPR, TREE_TYPE (orig_init), init_val, orig_init);
+      append_to_statement_list (mod, &stmt_list);
+
+      cond_val = create_tmp_var (TREE_TYPE (orig_cond), "__cilk_cond");
+      mod = build2 (MODIFY_EXPR, TREE_TYPE (orig_cond), cond_val, orig_cond);
+      append_to_statement_list (mod, &stmt_list);
+  
+      gimplify_and_add (stmt_list, &for_pre_body);
+    }
   gimplify_and_add (OMP_FOR_BODY (orig_for_stmt), &for_body);
+ 
+  if (TREE_CODE (orig_for_stmt) == CILK_FOR)
+    {
+      /* Sometimes an assign is inserted before the OMP_FOR_BODY.  So,
+	 search and find the omp for body.  */
+      gimple for_body_stmt = NULL;
+      for (gimple_stmt_iterator gsi = gsi_start (for_body); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	{
+	  for_body_stmt = gsi_stmt (gsi);
+	  if (gimple_code (for_body_stmt) == GIMPLE_OMP_PARALLEL)
+	    break;
+	}
+      gcc_assert (gimple_code (for_body_stmt) == GIMPLE_OMP_PARALLEL);
+      tree orig_clses = gimple_omp_parallel_clauses (for_body_stmt);
+      tree new_clause = build_omp_clause (input_location, OMP_CLAUSE_SHARED);
+      OMP_CLAUSE_DECL (new_clause) = init_val;
+      OMP_CLAUSE_CHAIN (new_clause) = orig_clses;
+
+      orig_clses = new_clause;
+      new_clause = build_omp_clause (input_location, OMP_CLAUSE_SHARED);
+      OMP_CLAUSE_DECL (new_clause) = cond_val;
+      OMP_CLAUSE_CHAIN (new_clause) = orig_clses;
 
+      orig_clses = new_clause;
+      new_clause = build_omp_clause (input_location, OMP_CLAUSE_SHARED);
+      OMP_CLAUSE_DECL (new_clause) = incr_val;
+      OMP_CLAUSE_CHAIN (new_clause) = orig_clses;
+
+      gimple_omp_parallel_set_clauses (for_body_stmt, new_clause);
+    }
   if (orig_for_stmt != for_stmt)
     for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
       {
@@ -6825,6 +6914,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
     case OMP_FOR: kind = GF_OMP_FOR_KIND_FOR; break;
     case OMP_SIMD: kind = GF_OMP_FOR_KIND_SIMD; break;
     case CILK_SIMD: kind = GF_OMP_FOR_KIND_CILKSIMD; break;
+    case CILK_FOR: kind = GF_OMP_FOR_KIND_CILKFOR; break;
     case OMP_DISTRIBUTE: kind = GF_OMP_FOR_KIND_DISTRIBUTE; break;
     default:
       gcc_unreachable ();
@@ -6859,6 +6949,9 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gimple_omp_for_set_incr (gfor, i, TREE_OPERAND (t, 1));
     }
 
+  if (kind == GF_OMP_FOR_KIND_CILKFOR) 
+    gimple_cilk_for_set_grain (grain, gfor);
+
   gimplify_seq_add_stmt (pre_p, gfor);
   if (ret != GS_ALL_DONE)
     return GS_ERROR;
@@ -7880,6 +7973,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	case OMP_FOR:
 	case OMP_SIMD:
 	case CILK_SIMD:
+	case CILK_FOR:
 	case OMP_DISTRIBUTE:
 	  ret = gimplify_omp_for (expr_p, pre_p);
 	  break;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f1ec1c6..0beaa2a
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -71,6 +71,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "tree-nested.h"
 #include "tree-eh.h"
+#include "cilk.h"
 
 
 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -198,6 +199,12 @@ struct omp_for_data
   struct omp_for_data_loop *loops;
 };
 
+/* A structure with necessary elements from _Cilk_for statement.  This
+   struct. node is passed in to WALK_STMT_INFO->INFO.  */
+typedef struct cilk_for_information {
+  bool found;
+  tree induction_var;
+} cilk_for_info;
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -314,6 +321,9 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
   fd->have_ordered = false;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
+  if (flag_enable_cilkplus 
+      && gimple_omp_for_kind (fd->for_stmt) ==  GF_OMP_FOR_KIND_CILKFOR)
+    fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
   collapse_iter = NULL;
   collapse_count = NULL;
 
@@ -391,8 +401,10 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
 	case GT_EXPR:
 	  break;
 	case NE_EXPR:
-	  gcc_assert (gimple_omp_for_kind (for_stmt)
-		      == GF_OMP_FOR_KIND_CILKSIMD);
+	  gcc_assert ((gimple_omp_for_kind (for_stmt)
+		       == GF_OMP_FOR_KIND_CILKSIMD)
+		      || (gimple_omp_for_kind (for_stmt)
+			  == GF_OMP_FOR_KIND_CILKFOR));
 	  break;
 	case LE_EXPR:
 	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
@@ -897,7 +909,31 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx)
 	 variable no longer really shared.  */
       if (shared_ctx->is_nested)
 	{
-	  omp_context *up;
+	  omp_context *up = shared_ctx->outer;
+
+	  /* If VAR is the induction variable of the outer _Cilk_for, then
+	     it needs to be passed as a value not pointer since it
+	     would not be overwritten by the body.  */
+	  if (flag_enable_cilkplus
+	      && gimple_code (up->stmt) == GIMPLE_OMP_FOR
+	      && gimple_omp_for_kind (up->stmt) == GF_OMP_FOR_KIND_CILKFOR) 
+	    while (up) 
+	      { 
+		if (gimple_code (up->stmt) == GIMPLE_OMP_FOR
+		    && gimple_omp_for_kind (up->stmt)
+		    == GF_OMP_FOR_KIND_CILKFOR)
+		  {
+		    struct omp_for_data fd;
+		    /* _Cilk_for always has collapse = 1.  */
+		    struct omp_for_data_loop *loops
+		      = (struct omp_for_data_loop *)
+		      alloca (sizeof (struct omp_for_data_loop));
+		    extract_omp_for_data (up->stmt, &fd, loops);
+		    if (DECL_NAME (decl) == DECL_NAME (fd.loop.v))
+		      return false;
+		  }
+		up = up->outer;
+	      }
 
 	  for (up = shared_ctx->outer; up; up = up->outer)
 	    if (is_taskreg_ctx (up) && maybe_lookup_decl (decl, up))
@@ -1818,27 +1854,112 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
 }
 
-/* Create a new name for omp child function.  Returns an identifier.  */
+/* Create a new name for omp child function.  Returns an identifier.  If 
+   IS_CILK_FOR is true then the suffix for the child function is 
+   "_cilk_for_fn."  */
 
 static tree
-create_omp_child_function_name (bool task_copy)
+create_omp_child_function_name (bool task_copy, bool is_cilk_for)
 {
+  if (is_cilk_for)
+    return clone_function_name (current_function_decl, "_cilk_for_fn");
   return (clone_function_name (current_function_decl,
 			       task_copy ? "_omp_cpyfn" : "_omp_fn"));
 }
 
+/* Helper function for walk_gimple_seq function.  *GSI_P is the gimple stmt.
+   iterator passed by walk_gimple_seq and *WI->INFO holds the CILK_FOR_INFO
+   structure.  This function sets the values inside this structure if it
+   finds a _Cilk_for statement in *GSI_P.  HANDLED_OPS_P is unused.  */
+
+static tree
+find_cilk_for_stmt (gimple_stmt_iterator *gsi_p,
+		    bool *handled_ops_p ATTRIBUTE_UNUSED,
+		    struct walk_stmt_info *wi)
+{
+  cilk_for_info *cf_info = (cilk_for_info *) wi->info;
+  gimple stmt = gsi_stmt (*gsi_p);
+
+  if (gimple_code (stmt) == GIMPLE_OMP_FOR
+      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_CILKFOR)
+      /* For nested _Cilk_for statments, just look into the
+	 outer-most one.  */
+      && cf_info->found == false)
+    {
+      cf_info->found = true;
+      cf_info->induction_var = gimple_cilk_for_induction_var (stmt);
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if STMT contains a CILK_FOR statement.  If found then
+   set *IND_VAR with induction variable.  Otherwise these values remain 
+   untouched.  IND_VAR can be NULL and if so then it is left untouched.  */
+
+static bool
+is_cilk_for_stmt (gimple stmt, tree *ind_var)
+{
+  if (!flag_enable_cilkplus)
+    return false;
+    
+  gimple_seq body = stmt;
+  struct walk_stmt_info wi;
+  cilk_for_info cf_info;
+  memset (&cf_info, 0, sizeof (cilk_for_info));
+  memset (&wi, 0, sizeof (wi));
+  wi.info = &cf_info;
+  walk_gimple_seq (body, find_cilk_for_stmt, NULL, &wi);
+  if (cf_info.found)
+    {
+      if (ind_var)
+	*ind_var = cf_info.induction_var;
+      return true;
+    }
+    
+  return false;
+}
+
+/* Returns the type of the induction variable for the child function for
+   _Cilk_for and the types for _high and _low variables based on TYPE.  */
+
+static tree
+cilk_for_check_loop_diff_type (tree type)
+{
+  if (type == integer_type_node)
+    return type;
+  else if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
+    { 
+      if (TYPE_UNSIGNED (type)) 
+	return uint32_type_node;
+      else
+	return integer_type_node;
+    }
+  else
+    {
+      if (TYPE_UNSIGNED (type)) 
+	return uint64_type_node;
+      else
+	return long_long_integer_type_node;
+    }
+  gcc_unreachable ();
+}
+
 /* Build a decl for the omp child function.  It'll not contain a body
    yet, just the bare decl.  */
 
 static void
-create_omp_child_function (omp_context *ctx, bool task_copy)
+create_omp_child_function (omp_context *ctx, bool task_copy,
+			   bool is_cilk_for, tree cilk_var_type)
 {
   tree decl, type, name, t;
-
-  name = create_omp_child_function_name (task_copy);
+ 
+  name = create_omp_child_function_name (task_copy, is_cilk_for);
   if (task_copy)
     type = build_function_type_list (void_type_node, ptr_type_node,
 				     ptr_type_node, NULL_TREE);
+  else if (is_cilk_for)
+    type = build_function_type_list (void_type_node, ptr_type_node,
+				     cilk_var_type, cilk_var_type, NULL_TREE);
   else
     type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
 
@@ -1888,6 +2009,33 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
   DECL_CONTEXT (t) = decl;
   DECL_RESULT (decl) = t;
 
+  /* _Cilk_for's child function requires two extra parameters called 
+     __low and __high that are set the by Cilk runtime when it calls this 
+     function.  */
+  if (is_cilk_for)
+    {
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__high"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__low"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+    }
   t = build_decl (DECL_SOURCE_LOCATION (decl),
 		  PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
   DECL_ARTIFICIAL (t) = 1;
@@ -1895,6 +2043,8 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
   DECL_ARG_TYPE (t) = ptr_type_node;
   DECL_CONTEXT (t) = current_function_decl;
   TREE_USED (t) = 1;
+  if (is_cilk_for)
+    DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
   DECL_ARGUMENTS (decl) = t;
   if (!task_copy)
     ctx->receiver_decl = t;
@@ -2016,7 +2166,15 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
   DECL_ARTIFICIAL (name) = 1;
   DECL_NAMELESS (name) = 1;
   TYPE_NAME (ctx->record_type) = name;
-  create_omp_child_function (ctx, false);
+
+  tree ind_var = NULL_TREE;
+  bool is_cilk_for = (flag_enable_cilkplus && outer_ctx
+		      && is_cilk_for_stmt (outer_ctx->stmt, &ind_var));
+  tree cilk_var_type =
+    (is_cilk_for ? cilk_for_check_loop_diff_type (TREE_TYPE (ind_var))
+     : NULL_TREE);
+
+  create_omp_child_function (ctx, false, is_cilk_for, cilk_var_type);
   gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
 
   scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
@@ -2061,7 +2219,7 @@ scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
   DECL_ARTIFICIAL (name) = 1;
   DECL_NAMELESS (name) = 1;
   TYPE_NAME (ctx->record_type) = name;
-  create_omp_child_function (ctx, false);
+  create_omp_child_function (ctx, false, false, NULL_TREE);
   gimple_omp_task_set_child_fn (stmt, ctx->cb.dst_fn);
 
   scan_sharing_clauses (gimple_omp_task_clauses (stmt), ctx);
@@ -2074,7 +2232,7 @@ scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
       DECL_ARTIFICIAL (name) = 1;
       DECL_NAMELESS (name) = 1;
       TYPE_NAME (ctx->srecord_type) = name;
-      create_omp_child_function (ctx, true);
+      create_omp_child_function (ctx, true, false, NULL_TREE);
     }
 
   scan_omp (gimple_omp_body_ptr (stmt), ctx);
@@ -2199,7 +2357,7 @@ scan_omp_target (gimple stmt, omp_context *outer_ctx)
   TYPE_NAME (ctx->record_type) = name;
   if (kind == GF_OMP_TARGET_KIND_REGION)
     {
-      create_omp_child_function (ctx, false);
+      create_omp_child_function (ctx, false, false, NULL_TREE);
       gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
     }
 
@@ -2993,6 +3151,15 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf,
   return true;
 }
 
+/* Returns true if the variable name in DECL matches *NAME.  */
+
+static inline bool
+is_cilk_loop_var (tree decl, const char *name)
+{
+  return (DECL_NAME (decl) && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), 
+					name, strlen (name))); 
+}
+
 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
    from the receiver (aka child) side and initializers for REFERENCE_TYPE
    private variables.  Initialization statements go in ILIST, while calls
@@ -3245,6 +3412,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	      SET_DECL_VALUE_EXPR (new_var, x);
 	      DECL_HAS_VALUE_EXPR_P (new_var) = 1;
 
+	      /* In _Cilk_for, the increment, start and final values
+		 are stored in the clause inserted by gimplify_omp_for.  
+		 This value is used by the child function to find the 
+		 appropriate induction value function based on the 
+		 high and low parameters of the child function.  
+		 Now, we need to store the decl value expressions here so 
+		 that we can easily access them.  */
+	      if (flag_enable_cilkplus 
+		  && (is_cilk_loop_var (var, "__cilk_init") 
+		      || is_cilk_loop_var (var, "__cilk_cond")
+		      || is_cilk_loop_var (var, "__cilk_incr"))) 
+		SET_DECL_VALUE_EXPR (var, x);
 	      /* ??? If VAR is not passed by reference, and the variable
 		 hasn't been initialized yet, then we'll get a warning for
 		 the store into the omp_data_s structure.  Ideally, we'd be
@@ -4628,6 +4807,250 @@ expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from)
     }
 }
 
+/* Returns true if T is a tree whose code is COMPONENT_REF and its field
+   matches D_F_NAME and the data argument matches D_ARG_NAME.  */
+
+static bool
+cilk_find_field_value (tree t, tree d_arg_name, tree d_f_name)
+{
+  if (TREE_CODE (t) == COMPONENT_REF)
+    {
+      tree arg = TREE_OPERAND (t, 0);
+      tree field = TREE_OPERAND (t, 1);
+      if (TREE_CODE (arg) == ADDR_EXPR || TREE_CODE (arg) == MEM_REF)
+	arg = TREE_OPERAND (arg, 0);
+      if (DECL_NAME (arg) && DECL_NAME (field)
+	  && !strcmp (IDENTIFIER_POINTER (d_arg_name),
+		      IDENTIFIER_POINTER (DECL_NAME (arg)))
+	  && !strcmp (IDENTIFIER_POINTER (d_f_name),
+		      IDENTIFIER_POINTER (DECL_NAME (field)))) 
+	return true;
+    }
+  return false;
+}
+
+/* Find the COMPONENT_REF in all the basic blocks in REGION whose 
+   data-argument is DATA_ARG and field is FIELD and then replace that 
+   COMPONENT_REF value with NEW_VALUE, a VAR_DECL.  */
+
+static void
+cilk_for_find_component_expr (struct omp_region *region, tree data_arg,
+			      tree field, tree new_value)
+{
+  vec<basic_block> bbs;
+  basic_block bb;
+  unsigned ii;
+  tree new_val = NULL_TREE;
+  bbs.create (0);
+  gather_blocks_in_sese_region (region->entry, region->exit, &bbs);
+  /* No need to push the entry bb into BBS since it doesn't get inserted
+     into the child function.  */
+  
+  tree da_name = DECL_NAME (data_arg);
+  tree df_name = DECL_NAME (field);
+  FOR_EACH_VEC_ELT (bbs, ii, bb)    
+    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+	 gsi_next (&gsi))
+      {
+	gimple stmt = gsi_stmt (gsi);
+	if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	  for (unsigned jj = 1; jj < gimple_num_ops (stmt); jj++)
+	    {
+	      tree *op = gimple_op_ptr (stmt, jj);
+	      if (TREE_CODE (*op) == COMPONENT_REF
+		  && cilk_find_field_value (*op, da_name, df_name))
+		{    
+		  if (TREE_TYPE (*op) == TREE_TYPE (new_value))
+		    new_val = new_value;
+		  else
+		    {
+		      tree t = fold_convert (TREE_TYPE (*op), new_value);
+		      new_val =
+			force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+						  true, GSI_NEW_STMT);
+		    }
+		  gsi_insert_before (&gsi, gimple_build_assign (*op, new_val), 
+				     GSI_NEW_STMT);
+		  *op = new_val;
+		}
+	    }
+      }
+}
+
+/* Find the initial, final and increment values in BODY_STMT's clause
+   and store them in *INIT, *FINAL and *INCR parameters respectively.  */
+
+static void
+find_cilk_for_vars (gimple body_stmt, tree *init, tree *final, tree *incr)
+{
+  /* Initial, final and increment value all start with __cilk_init,
+     __cilk_cond and __cilk_incr, respectively.  These values are defined
+     in shared clause.  Thus, we search for those.  */
+  for (tree cc = gimple_omp_parallel_clauses (body_stmt); cc; 
+       cc = OMP_CLAUSE_CHAIN (cc))
+    if (OMP_CLAUSE_CODE (cc) == OMP_CLAUSE_SHARED)
+      {
+	tree decl = OMP_CLAUSE_DECL (cc);
+	if (is_cilk_loop_var (decl, "__cilk_incr"))
+	  { 
+	    *incr = decl;
+	    if (DECL_VALUE_EXPR (*incr))
+	      *incr = DECL_VALUE_EXPR (*incr);
+	  } 
+	else if (is_cilk_loop_var (decl, "__cilk_init"))
+	  { 
+	    *init = decl;
+	    if (DECL_VALUE_EXPR (*init))
+	      *init = DECL_VALUE_EXPR (*init);
+	  }
+	else if (is_cilk_loop_var (decl, "__cilk_cond"))
+	  { 
+	    *final = decl;
+	    if (DECL_VALUE_EXPR (*final))
+	      *final = DECL_VALUE_EXPR (*final);
+	  }
+      }
+}
+ 
+/* Expand the _Cilk_for body starting at REGION.  DATA_ARG, HIGH and LOW 
+   indicates data-argument, __high and __low parameters of the child 
+   function.  */
+
+static void
+expand_cilk_for_body (struct omp_region *region, tree data_arg,
+		      tree low, tree high)
+{
+  struct omp_for_data fd;
+  struct omp_for_data_loop *loops;
+  loops
+    = (struct omp_for_data_loop *)
+      alloca (gimple_omp_for_collapse (last_stmt (region->outer->entry))
+	      * sizeof (struct omp_for_data_loop));
+  extract_omp_for_data (last_stmt (region->outer->entry), &fd, loops);
+  region->sched_kind = fd.sched_kind;
+  basic_block entry_bb = region->entry;
+  
+  /* This is where the body is and the location where we must insert
+     the modification to the induction variable.  */
+  basic_block body_bb = single_succ (region->entry);
+  gimple entry_stmt = last_stmt (region->entry);
+  
+  /* Split the first basic block into two and put the initializer values
+     in the top one.  */
+  gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
+  basic_block l1_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
+  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+  gsi = gsi_last_bb (l1_bb);
+  tree type = cilk_for_check_loop_diff_type (TREE_TYPE (fd.loop.v));
+  tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
+  tree t = fold_convert (type, low);
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
+				GSI_NEW_STMT);
+  gimple stmt = gimple_build_assign (ind_var, fold_convert (type, t));
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+  vec_alloc (region->ws_args, 2);
+  tree t1 = null_pointer_node;
+  tree t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
+  if (data_arg)
+    {
+      t1 = build_fold_addr_expr (gimple_omp_parallel_data_arg (entry_stmt));
+      gsi = gsi_start_bb (body_bb);
+      tree init = NULL_TREE, final_val = NULL_TREE, incr = NULL_TREE;
+      find_cilk_for_vars (entry_stmt, &init, &final_val, &incr);
+
+      tree step = fd.loop.step;
+      if (TREE_CODE (fd.loop.step) != INTEGER_CST)
+	step = incr;      
+      step = fold_convert (type, step);
+      if (TREE_CODE (step) == INTEGER_CST && tree_int_cst_sgn (step) < 1)
+	step = fold_build1_loc (UNKNOWN_LOCATION, NEGATE_EXPR, type, step);
+      
+      tree tmp = create_tmp_reg (type, NULL);
+      gsi_insert_before (&gsi, gimple_build_assign (tmp, step),
+			 GSI_NEW_STMT);
+      t = build2 (MULT_EXPR, type, ind_var, tmp);
+      tree tmp2 = create_tmp_reg (type, NULL);
+      gsi_insert_after (&gsi, gimple_build_assign (tmp2, t), GSI_NEW_STMT);
+
+      tmp = create_tmp_reg (type, NULL);
+      init = fold_convert (type, init);
+      tree init_tmp = force_gimple_operand_gsi
+	(&gsi, init, true, NULL_TREE, false, GSI_CONTINUE_LINKING); 
+
+      gsi_insert_after (&gsi, gimple_build_assign (tmp, init_tmp), 
+			GSI_NEW_STMT);
+      if (fd.loop.cond_code == GE_EXPR || fd.loop.cond_code == GT_EXPR) 
+	t = fold_build2 (MINUS_EXPR, type, tmp, tmp2);
+      else 
+	t = fold_build2 (PLUS_EXPR, type, tmp, tmp2);
+
+      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, 
+				    GSI_CONTINUE_LINKING);
+      tree tmp3 = create_tmp_reg (type, NULL);
+      gimple stmt = gimple_build_assign (tmp3, t);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+      cilk_for_find_component_expr (region, data_arg, fd.loop.v, tmp3);
+    }
+  region->ws_args->quick_push (t1);
+  region->ws_args->quick_push (t2);
+  
+  gsi = gsi_last_bb (l1_bb);
+  basic_block cond_bb = split_block (l1_bb, gsi_stmt (gsi))->dest;
+  single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
+
+  gsi = gsi_last_bb (cond_bb);
+  t = fold_convert (type, high);
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
+				GSI_CONTINUE_LINKING);
+  t = build2 (LT_EXPR, boolean_type_node, ind_var, t);
+  stmt = gimple_build_cond_empty (t);
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+  if (walk_tree (gimple_cond_lhs_ptr (stmt), expand_omp_regimplify_p,
+		 NULL, NULL)
+      || walk_tree (gimple_cond_rhs_ptr (stmt), expand_omp_regimplify_p,
+		    NULL, NULL))
+    {
+      gsi = gsi_for_stmt (stmt);
+      gimple_regimplify_operands (stmt, &gsi);
+    }
+
+  /* Insert incrementing of induction variable.  */
+  gsi = gsi_last_bb (body_bb);
+  t = build2 (PLUS_EXPR, type, ind_var, build_one_cst (type));
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+				GSI_CONTINUE_LINKING);
+  gsi_insert_after (&gsi, gimple_build_assign (ind_var, t),
+		    GSI_CONTINUE_LINKING);
+  
+  basic_block exit_bb = region->exit;
+
+  gsi = gsi_last_bb (exit_bb);
+  basic_block last_bb = split_block (exit_bb, gsi_stmt (gsi))->dest;
+  
+  /* Remove the #pragma omp return.  */
+  gsi = gsi_last_bb (exit_bb);
+  gsi_remove (&gsi, true);
+  
+  gsi = gsi_last_bb (last_bb);
+  gsi_insert_before (&gsi, gimple_build_return (NULL), GSI_SAME_STMT);
+  
+  /* Now connect all the basic-blocks.  */
+  edge e = make_edge (cond_bb, last_bb, EDGE_FALSE_VALUE);
+  e->probability = REG_BR_PROB_BASE / 4;
+
+  edge e3 = find_edge (cond_bb, body_bb);
+  e3->probability = REG_BR_PROB_BASE * 3 / 4;
+  e3->flags = EDGE_TRUE_VALUE;
+  
+  edge e2 = find_edge (exit_bb, last_bb);
+  remove_edge (e2);
+  e2 = make_edge (exit_bb, cond_bb, EDGE_FALLTHRU);
+  e2->probability = 1;
+  region->exit = last_bb;
+}
+
 /* Expand the OpenMP parallel or task directive starting at REGION.  */
 
 static void
@@ -4640,6 +5063,7 @@ expand_omp_taskreg (struct omp_region *region)
   gimple entry_stmt, stmt;
   edge e;
   vec<tree, va_gc> *ws_args;
+  gimple parcopy_stmt = NULL;
 
   entry_stmt = last_stmt (region->entry);
   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
@@ -4648,6 +5072,16 @@ expand_omp_taskreg (struct omp_region *region)
   entry_bb = region->entry;
   exit_bb = region->exit;
 
+  /* The way _Cilk_for is constructed in the compiler is like making
+     the _Cilk_for statment a #pragma OMP for and the body of it is
+     enclosed in #pragma omp parallel.  In this routine, we handle
+     inserting the body into the child function and putting a loop around
+     it to go from low to high.  NOTE: Even though this is how the 
+     compiler breaks them, they do NOT function the same way.  */
+  bool is_cilk_for =
+    (flag_enable_cilkplus && region->outer
+     && is_cilk_for_stmt (last_stmt (region->outer->entry), NULL));
+    
   if (is_combined_parallel (region))
     ws_args = region->ws_args;
   else
@@ -4698,7 +5132,6 @@ expand_omp_taskreg (struct omp_region *region)
 	  basic_block entry_succ_bb = single_succ (entry_bb);
 	  gimple_stmt_iterator gsi;
 	  tree arg, narg;
-	  gimple parcopy_stmt = NULL;
 
 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
 	    {
@@ -4755,6 +5188,29 @@ expand_omp_taskreg (struct omp_region *region)
 	    }
 	}
 
+      /* Extract the __high and __low parameter from the function.  */
+      tree high_arg = NULL_TREE, low_arg = NULL_TREE;
+      if (is_cilk_for)
+	{
+	  for (tree ii_arg = DECL_ARGUMENTS (child_fn); ii_arg != NULL_TREE;
+	       ii_arg = TREE_CHAIN (ii_arg))
+	    {
+	      if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)),
+			   "__high"))
+		high_arg = ii_arg;
+	      if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)), "__low"))
+		low_arg = ii_arg;
+	    }
+	  gcc_assert (high_arg);
+	  gcc_assert (low_arg);
+	  expand_cilk_for_body (region, gimple_get_lhs (parcopy_stmt),
+				low_arg, high_arg);
+
+	  /* A new BB is added to the end of EXIT_BB and thus it needs to be
+	     updated.  */
+	  exit_bb = region->exit;
+	}
+
       /* Declare local variables needed in CHILD_CFUN.  */
       block = DECL_INITIAL (child_fn);
       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
@@ -4787,7 +5243,7 @@ expand_omp_taskreg (struct omp_region *region)
       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
 
       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
-      if (exit_bb)
+      if (exit_bb && !is_cilk_for)
 	{
 	  gsi = gsi_last_bb (exit_bb);
 	  gcc_assert (!gsi_end_p (gsi)
@@ -4861,11 +5317,16 @@ expand_omp_taskreg (struct omp_region *region)
       pop_cfun ();
     }
 
-  /* Emit a library call to launch the children threads.  */
-  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
-    expand_parallel_call (region, new_bb, entry_stmt, ws_args);
-  else
-    expand_task_call (new_bb, entry_stmt);
+  /* In _Cilk_for, the call to the runtime function is inserted by
+     expand_omp_for.  */
+  if (!is_cilk_for)
+    {
+      /* Emit a library call to launch the children threads.  */
+      if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
+	expand_parallel_call (region, new_bb, entry_stmt, ws_args);
+      else
+	expand_task_call (new_bb, entry_stmt);
+    }
   if (gimple_in_ssa_p (cfun))
     update_ssa (TODO_update_ssa_only_virtuals);
 }
@@ -6540,6 +7001,122 @@ expand_omp_for_static_chunk (struct omp_region *region,
     }
 }
 
+/* A subroutine of expand_omp_for.  Insert the function call to the
+   cilk library function-call: __cilkrts_cilk_for_64/32 into the end of
+   REGION.  Loop information is calculated using step, n1 and n2 from FD.  */
+
+static void
+insert_cilk_for_fn_call (struct omp_region *region, struct omp_for_data *fd)
+{
+  tree type = cilk_for_check_loop_diff_type (TREE_TYPE (fd->loop.v));
+  basic_block entry_bb = region->entry;
+  bool broken_loop = region->cont == NULL;
+  basic_block cont_bb = region->cont;
+  gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
+  tree diff_type = cilk_for_check_loop_diff_type (TREE_TYPE (fd->loop.v));
+  tree grain = gimple_cilk_for_grain (fd->for_stmt);
+  
+  /* Convert n2 and n1 to the type we need.  */
+  tree n1 = fold_convert (diff_type, fd->loop.n1);
+  tree n2 = fold_convert (diff_type, fd->loop.n2);
+
+  n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, true,
+				 GSI_SAME_STMT);
+  n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, true,
+				 GSI_SAME_STMT);
+  tree diff_val = fold_build2 (MINUS_EXPR, diff_type, n2, n1);
+
+  diff_val = force_gimple_operand_gsi (&gsi, diff_val, true, NULL_TREE,
+					    true, GSI_SAME_STMT);
+  tree step = fd->loop.step;
+  tree step_convert = force_gimple_operand_gsi (&gsi,
+						fold_convert (diff_type, step),
+						true, NULL_TREE, true,
+						GSI_SAME_STMT);
+  tree count = fold_build2 (TRUNC_DIV_EXPR, diff_type, diff_val, step_convert);
+  count = force_gimple_operand_gsi (&gsi, count, true, NULL_TREE, true,
+				    GSI_SAME_STMT);
+
+  tree data_arg_ptr = (*region->ws_args)[0];
+  tree child_fn = (*region->ws_args)[1];
+
+  tree lib_fun = NULL_TREE;
+  if (TYPE_PRECISION (type) == 32)
+    lib_fun = cilk_for_32_fndecl;
+  else if (TYPE_PRECISION (type) == 64)
+    lib_fun = cilk_for_64_fndecl;
+  else
+    gcc_unreachable ();
+
+  vec<tree, va_gc> *args;
+  vec_alloc (args, 4);
+  args->quick_push (child_fn);
+  args->quick_push (data_arg_ptr);
+  args->quick_push (count);
+  args->quick_push (grain);
+  tree t = build_call_expr_loc_vec (UNKNOWN_LOCATION, lib_fun, args);
+  gsi_remove (&gsi, true);
+
+  if (!broken_loop)
+    {
+      /* Code to control the increment goes in the CONT_BB.  */
+      gsi = gsi_last_bb (cont_bb);
+      gimple stmt = gsi_stmt (gsi);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+      gsi_remove (&gsi, true);
+      
+      /* remove the edge to OMP continue block.  */
+      unsigned int ii = 0;
+      while (EDGE_COUNT (cont_bb->succs) > 1)
+	{
+	  edge ee = EDGE_SUCC (cont_bb, ii);
+	  if (!(ee->flags & EDGE_FALLTHRU))
+	    remove_edge (ee);
+	  ii++;
+	}      
+      gsi = gsi_start_bb (cont_bb);
+      gsi_remove (&gsi, true);
+      force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+				GSI_CONTINUE_LINKING);
+    }
+
+  /* Remove GIMPLE_OMP_RETURN.  */
+  gsi = gsi_last_bb (region->exit);
+  gimple stmt = gsi_stmt (gsi);
+  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_RETURN);
+  gsi_remove (&gsi, true);
+
+  gsi = gsi_last_bb (region->entry);
+  t = fold_build2 (fd->loop.cond_code, boolean_type_node, n1, n2);
+  stmt = gimple_build_cond_empty (t);
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+  /* In here we are replacing a _Cilk_for statement with something
+     like this:
+
+     if (n1 <cond_code> n2)
+       goto bb1
+     else
+       goto bb2
+     
+     bb1:
+       .omp_data.o.__cilk_incr = __cilk_incr;
+       ...
+       __cilkrts_cilk_for_{32/64} (func_name, &omp_data_0, <count>, <grain>);
+
+     bb2:
+     clobber all values and go out.  */  
+  unsigned int ii = 0;
+  while (ii < EDGE_COUNT (region->entry->succs))
+    {
+      edge ee = EDGE_SUCC (region->entry, ii);
+      if (ee->flags & EDGE_FALLTHRU)
+	ee->flags = EDGE_TRUE_VALUE;
+      else
+	ee->flags = EDGE_FALSE_VALUE;
+      ii++;
+    }
+}
 
 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
    loop.  Given parameters:
@@ -6880,6 +7457,12 @@ expand_omp_for (struct omp_region *region, gimple inner_stmt)
 
   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_SIMD)
     expand_omp_simd (region, &fd);
+  else if (flag_enable_cilkplus 
+	   && (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_CILKFOR))
+    {
+      region->ws_args = region->inner->ws_args;
+      insert_cilk_for_fn_call (region, &fd);
+    }
   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
 	   && !fd.have_ordered)
     {
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
new file mode 100644
index 0000000..a80f413
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
@@ -0,0 +1,100 @@
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+static void check (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start;  ii < end; ii = ii + incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+static void check_reverse (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start; ii >= end; ii = ii - incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+
+int main (void)
+{
+  int Array[10];
+  int x = 9, y = 0, z = 3;
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array[ii] = 1133;
+  check (Array, 0, 10, 1, 1133);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 9; ii > -1; ii--)
+    Array[ii] = 4433;
+  check_reverse (Array, 9, 0, 1, 4433);
+
+  _Cilk_for (int ii = 9; ii > -1; --ii)
+    Array[ii] = 9988;
+  check_reverse (Array, 9, 0, 1, 9988);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    Array[ii] = 1328;
+  check (Array, 0, 10, 2, 1328);
+
+  _Cilk_for (int ii = 9; ii >= 0; ii -= 2)
+    Array[ii] = 1738;
+  check_reverse (Array, 9, 0, 2, 1738);
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    {
+      if (ii % 2)
+	Array[ii] = 1343;
+      else
+	Array[ii] = 3413;
+    }
+
+  check (Array, 1, 10, 2, 1343); 
+  check (Array, 0, 10, 2, 3413); 
+
+  _Cilk_for (short cc = 0; cc < 10; cc++) 
+    Array[cc] = 1343;
+  check (Array, 0, 10,  1,1343);
+
+  _Cilk_for (short cc = 9; cc >= 0; cc--)
+    Array[cc] = 1348;
+  check_reverse (Array, 9, 0, 1, 1348);
+
+
+
+  /* Loop with polynomials in _Cilk_for.  */
+  _Cilk_for (int ii = z - 3; ii <= z * 3; ii += 2)
+    { 
+      Array[ii] = 3233;
+    }
+
+  for (int ii = z-3; ii <= z*3; ii += 2)
+    if (Array[ii] != 3233)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
new file mode 100644
index 0000000..0ebc09a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+int main (void)
+{
+  int q = 0, ii = 0, jj = 0;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" } */
+    q = 5;
+
+  _Cilk_for (; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ; ii++) /* { dg-error "missing controlling predicate" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ii < 10, jj < 10; ii++)  /* { dg-error "expected ';' before ',' token" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ) /* { dg-error "missing increment" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0, jj = 0; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
+    q = 5;
+
+  _Cilk_for (volatile int vii = 0; vii < 10; vii++) /* { dg-error "iteration variable cannot be volatile" } */
+    q = 5;
+
+  _Cilk_for (static int sii = 0; sii < 10; sii++) /* { dg-error "static variable" } */
+    q = 5;
+
+  _Cilk_for (float fii = 3.47; fii < 5.23; fii++) /* { dg-error "invalid type for iteration variable" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; 10 > jj; ii++) /* { dg-error "invalid controlling predicate" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ii >> 1) /* { dg-error "invalid increment expression" } */
+    q = 5;
+
+  _Cilk_for (int ii = 10; ii >= 0; ii--) /* This is OK!  */
+    q = 5;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" } */
+    q = 5;
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
new file mode 100644
index 0000000..6cb9b03
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
@@ -0,0 +1,35 @@
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+int grain_value = 2;
+int main (void)
+{
+  int Array1[200], Array1_Serial[200];
+
+  for (int ii = 0; ii < 200; ii++)
+    {
+      Array1_Serial[ii] = 2;
+      Array1[ii] = 1;
+    }
+
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 200; ii++)
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+#pragma cilk grainsize = grain_value
+  _Cilk_for (int ii = 0; ii < 200; ii++) 
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
new file mode 100644
index 0000000..ff8bc0a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus -Wunknown-pragmas" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+char Array1[26];
+
+#pragma cilk grainsize = 2 /* { dg-error "must be inside a function" } */
+
+int main(int argc, char **argv)
+{
+/* This is OK.  */
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize 2 /* { dg-error "expected '=' before numeric constant" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsiz = 2 /* { dg-warning "ignoring #pragma cilk grainsiz" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+
+/* This is OK, it will do a type conversion to long int.  */
+#pragma cilk grainsize = 0.5 
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize = 1 
+  while (Array1[5] != 0) /* { dg-warning "grainsize pragma is not followed" } */
+    {
+    /* Blah */
+    }
+
+#pragma cilk grainsize = 1 
+  int q = 0; /* { dg-warning "grainsize pragma is not followed" } */
+  _Cilk_for (q = 0; q < 10; q++)
+    Array1[q]  = 5;
+
+  while (Array1[5] != 0)
+    {
+    /* Blah */
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
new file mode 100644
index 0000000..7a779f7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
@@ -0,0 +1,41 @@
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+
+/* <feature> loop control variable must have integer, pointer or class type
+   </feature>
+*/
+
+#define ARRAY_SIZE 10000
+int a[ARRAY_SIZE];
+
+int main(void)
+{ 
+  int ii = 0;
+
+#if 1
+  for (ii =0; ii < ARRAY_SIZE; ii++)
+    a[ii] = 5;
+#endif
+  _Cilk_for(int *aa = a; aa < a + ARRAY_SIZE; aa++) 
+    *aa = 0;
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii++) 
+    if (a[ii] != 0) 
+      __builtin_abort ();
+#endif
+
+  _Cilk_for (int *aa = a; aa < a + ARRAY_SIZE; aa = aa + 2)
+    *aa = 4;
+
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii = ii + 2) 
+    if (a[ii] != 4) 
+      __builtin_abort ();
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
new file mode 100644
index 0000000..cffe17e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
@@ -0,0 +1,79 @@
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+int main (void)
+{
+  int Array[10][10];
+
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj++)
+	{
+	  Array[ii][jj] = 0;
+	}
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 5; jj++)
+      Array[ii][jj] = 5;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 5; jj++)
+      if (Array[ii][jj] != 5)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+
+  /* One goes up and one goes down.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 9; jj >= 0; jj--)
+      Array[ii][jj] = 7;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 9; jj >= 0; jj--)
+      if (Array[ii][jj] != 7)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 10; jj += 2)
+      Array[ii][jj] = 9;
+  
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj += 2)
+      if (Array[ii][jj] != 9)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    _Cilk_for (int jj = 5; jj < 9; jj++)
+      Array[ii][jj] = 11; 
+  
+  for (int ii = 0; ii < 10; ii += 2)
+    for (int jj = 5; jj < 9; jj++)
+      if (Array[ii][jj] != 11)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  return 0;
+}
+
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 0a41b86..988408a 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -351,6 +351,7 @@ enum omp_clause_schedule_kind {
   OMP_CLAUSE_SCHEDULE_GUIDED,
   OMP_CLAUSE_SCHEDULE_AUTO,
   OMP_CLAUSE_SCHEDULE_RUNTIME,
+  OMP_CLAUSE_SCHEDULE_CILKFOR,
   OMP_CLAUSE_SCHEDULE_LAST
 };
 
diff --git a/gcc/tree.def b/gcc/tree.def
index f8d6444..558d7c8 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1051,6 +1051,10 @@ DEFTREECODE (OMP_SIMD, "omp_simd", tcc_statement, 6)
    Operands like for OMP_FOR.  */
 DEFTREECODE (CILK_SIMD, "cilk_simd", tcc_statement, 6)
 
+/* Cilk Plus - _Cilk_for (..)
+   Operands like for OMP_FOR.  */
+DEFTREECODE (CILK_FOR, "cilk_for", tcc_statement, 6)
+
 /* OpenMP - #pragma omp distribute [clause1 ... clauseN]
    Operands like for OMP_FOR.  */
 DEFTREECODE (OMP_DISTRIBUTE, "omp_distribute", tcc_statement, 6)

Comments

Jakub Jelinek Jan. 8, 2014, 5:31 p.m. UTC | #1
On Tue, Jan 07, 2014 at 10:11:59PM +0000, Iyer, Balaji V wrote:
> 	I used a similar existing one (safelen). Attached, please find 2
> fixed patches for C and C++ along with their changelogs.

But safelen is something completely different, while if I skim
the _Cilk_for docs, the grain is really a chunk size, where the runtime
library performs the scheduling of grain sized chunks, so using
OMP_CLAUSE_SCHEDULE clause with
OMP_CLAUSE_SCHEDULE_KIND (c) = OMP_CLAUSE_SCHEDULE_RUNTIME;
OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (c) = grain_expr;
sounds like what should be used.  OMP_CLAUSE_SAFELEN says what is the
minimal vectorization factor the compiler can assume is safe for
a simd loop.

	Jakub
diff mbox

Patch

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index c99c1fc..6ad35d0
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -237,8 +237,8 @@  static void cp_parser_initial_pragma
 static tree cp_literal_operator_id
   (const char *);
 
-static void cp_parser_cilk_simd
-  (cp_parser *, cp_token *);
+static tree cp_parser_cilk_simd
+  (cp_parser *, cp_token *, tree);
 static bool cp_parser_omp_declare_reduction_exprs
   (tree, cp_parser *);
 
@@ -9364,6 +9364,18 @@  cp_parser_statement (cp_parser* parser, tree in_statement_expr,
 	  statement = cp_parser_iteration_statement (parser, false);
 	  break;
 
+	case RID_CILK_FOR:
+	  if (!flag_enable_cilkplus)
+	    {
+	      error_at (cp_lexer_peek_token (parser->lexer)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      cp_lexer_consume_token (parser->lexer);
+	      statement = error_mark_node;
+	    }
+	  else
+	    statement = cp_parser_cilk_simd (parser, NULL, integer_zero_node);
+	  break;
+
 	case RID_BREAK:
 	case RID_CONTINUE:
 	case RID_RETURN:
@@ -28694,7 +28706,7 @@  cp_parser_omp_for_cond (cp_parser *parser, tree decl, enum tree_code code)
     case LE_EXPR:
       break;
     case NE_EXPR:
-      if (code == CILK_SIMD)
+      if (code == CILK_SIMD || code == CILK_FOR)
 	break;
       /* Fall through: OpenMP disallows NE_EXPR.  */
     default:
@@ -29019,11 +29031,18 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       bool add_private_clause = false;
       location_t loc;
 
-      if (!cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+      if (code == CILK_SIMD
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
 	{
 	  cp_parser_error (parser, "for statement expected");
 	  return NULL;
 	}
+      if (code == CILK_FOR
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_CILK_FOR))
+	{
+	  cp_parser_error (parser, "_Cilk_for statement expected");
+	  return NULL;
+	}
       loc = cp_lexer_consume_token (parser->lexer)->location;
 
       if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
@@ -29032,13 +29051,26 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       init = decl = real_decl = NULL;
       this_pre_body = push_stmt_list ();
 
+      if (code == CILK_FOR
+	  && cp_lexer_next_token_is_keyword (parser->lexer, RID_STATIC))
+	{
+	  error_at (cp_lexer_peek_token (parser->lexer)->location,
+		    "induction variable cannot be static");
+	  cp_lexer_consume_token (parser->lexer);
+	}
       add_private_clause
 	|= cp_parser_omp_for_loop_init (parser,
-					/*parsing_openmp=*/code != CILK_SIMD,
+					/*parsing_openmp=*/
+					(code != CILK_SIMD && code != CILK_FOR),
 					this_pre_body, for_block,
 					init, decl, real_decl);
 
-      cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+      if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)
+	  && code == CILK_FOR)
+	{
+	  cp_parser_skip_to_end_of_statement (parser);
+	  cp_parser_consume_semicolon_at_end_of_statement (parser);
+	}
       if (this_pre_body)
 	{
 	  this_pre_body = pop_stmt_list (this_pre_body);
@@ -29196,17 +29228,30 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
 
   /* Note that we saved the original contents of this flag when we entered
      the structured block, and so we don't need to re-save it here.  */
-  if (code == CILK_SIMD)
+  if (code == CILK_SIMD || code == CILK_FOR)
     parser->in_statement = IN_CILK_SIMD_FOR;
   else
     parser->in_statement = IN_OMP_FOR;
 
+  tree top_body = NULL_TREE, top_level_body = NULL_TREE;
+  if (code == CILK_FOR)
+    {
+      top_level_body = push_stmt_list ();
+      top_body = begin_omp_parallel ();
+    }
+
   /* Note that the grammar doesn't call for a structured block here,
      though the loop as a whole is a structured block.  */
   body = push_stmt_list ();
   cp_parser_statement (parser, NULL_TREE, false, NULL);
   body = pop_stmt_list (body);
 
+  if (code == CILK_FOR)
+    {
+      body = add_stmt (body);
+      body = finish_omp_parallel (NULL_TREE, top_body);
+      body = pop_stmt_list (top_level_body);
+    }
   if (declv == NULL_TREE)
     ret = NULL_TREE;
   else
@@ -31084,6 +31129,38 @@  cp_parser_initial_pragma (cp_token *first_token)
   cp_lexer_get_preprocessor_token (NULL, first_token);
 }
 
+/* Parses the grainsize pragma for the _Cilk_for statement.
+   Syntax:
+   #pragma cilk grainsize = <VALUE>.  */
+
+static void
+cp_parser_cilk_grainsize (cp_parser *parser, cp_token *pragma_tok)
+{
+  if (cp_parser_require (parser, CPP_EQ, RT_EQ))
+    {
+      tree exp = cp_parser_binary_expression (parser, false, false,
+                                              PREC_NOT_OPERATOR, NULL);
+      cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+      if (!exp || exp == error_mark_node)
+        {
+          error_at (pragma_tok->location, "invalid grainsize for _Cilk_for");
+          return;
+        }
+      cp_token *n_tok = cp_lexer_peek_token (parser->lexer);
+
+      /* Make sure the next token is _Cilk_for, it is invalid otherwise.  */
+      if (n_tok && n_tok->type == CPP_KEYWORD
+	  && n_tok->keyword == RID_CILK_FOR)
+	cp_parser_cilk_simd (parser, NULL, exp);
+      else
+	warning_at (cp_lexer_peek_token (parser->lexer)->location, 0,
+		    "%<#pragma cilk grainsize%> is not followed by "
+		    "%<_Cilk_for%>");
+      return;
+    }
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -31263,9 +31340,30 @@  cp_parser_pragma (cp_parser *parser, enum pragma_context context)
 		    "%<#pragma simd%> must be inside a function");
 	  break;
 	}
-      cp_parser_cilk_simd (parser, pragma_tok);
+      cp_parser_cilk_simd (parser, pragma_tok, NULL_TREE);
       return true;
 
+    case PRAGMA_CILK_GRAINSIZE:
+      if (context == pragma_external)
+        {
+          error_at (pragma_tok->location,
+                    "%<#pragma cilk grainsize%> must be inside a function");
+          break;
+        }
+
+      /* Ignore the pragma if Cilk Plus is not enabled.  */
+      if (flag_enable_cilkplus)
+        {
+          cp_parser_cilk_grainsize (parser, pragma_tok);
+          return true;
+        }
+      else
+        {
+          error_at (pragma_tok->location, "-fcilkplus must be enabled to use "
+                    "%<#pragma cilk grainsize%>");
+          break;
+	}
+      
     default:
       gcc_assert (id >= PRAGMA_FIRST_EXTERNAL);
       c_invoke_pragma_handler (id);
@@ -31555,31 +31653,63 @@  cp_parser_cilk_simd_all_clauses (cp_parser *parser, cp_token *pragma_token)
     return c_finish_cilk_clauses (clauses);
 }
 
-/* Main entry-point for parsing Cilk Plus <#pragma simd> for loops.  */
+/* Main entry-point for parsing Cilk Plus <#pragma simd> for and _Cilk_for
+   loops.  This function returns NULL_TREE whenever it is parsing the
+   #pragma simd's for because the caller does not check the return value.
+   _Cilk_for's caller checks this value and thus return error_mark_node
+   when errors happen and a valid value when things go as expected.  */
 
-static void
-cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token)
+static tree
+cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token, tree grain)
 {
-  tree clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
+  bool is_cilk_for = !pragma_token ? true: false;
+  tree clauses = NULL_TREE;
+
+  if (!is_cilk_for)
+    clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
 
   if (clauses == error_mark_node)
-    return;
-  
-  if (cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
+    return NULL_TREE;
+
+  if (!is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
     {
       error_at (cp_lexer_peek_token (parser->lexer)->location,
 		"for statement expected");
-      return;
+      return NULL_TREE;
+    }
+  if (is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_CILK_FOR))
+    {
+      error_at (cp_lexer_peek_token (parser->lexer)->location,
+		"_Cilk_for statement expected");
+      return error_mark_node;
     }
 
   tree sb = begin_omp_structured_block ();
   int save = cp_parser_begin_omp_structured_block (parser);
-  tree ret = cp_parser_omp_for_loop (parser, CILK_SIMD, clauses, NULL);
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  tree ret = cp_parser_omp_for_loop (parser, code, clauses, NULL);
   if (ret)
     cpp_validate_cilk_plus_loop (OMP_FOR_BODY (ret));
+
+  /* For _Cilk_for statements, the grain value is stored in the same
+     location as clauses for OMP for.  */
+  if (is_cilk_for && ret)
+    { 
+      tree l = build_omp_clause (EXPR_LOCATION (grain),
+				 OMP_CLAUSE_SAFELEN);
+      OMP_CLAUSE_SAFELEN_EXPR (l) = grain;
+      OMP_CLAUSE_CHAIN (l) = NULL_TREE;
+      OMP_FOR_CLAUSES (ret) = l;
+    }
+
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_structured_block (sb));
-  return;
+  tree stmt = finish_omp_structured_block (sb);
+  add_stmt (stmt);
+  if (is_cilk_for) 
+    return stmt;
+  return NULL_TREE;
 }
 
 /* Create an identifier for a generic parameter type (a synthesized
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 98d7365..99d092b 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -13575,6 +13575,7 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
     case OMP_FOR:
     case OMP_SIMD:
     case CILK_SIMD:
+    case CILK_FOR:
     case OMP_DISTRIBUTE:
       {
 	tree clauses, body, pre_body;
@@ -13582,8 +13583,22 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
 	tree incrv = NULL_TREE;
 	int i;
 
-	clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false,
-				      args, complain, in_decl);
+	/* We cannot use the tsubst_omp_clauses since it will try to
+	   do checking such as whether a certain clause can be used
+	   with a certain for-loop.  We are just use safelen clause here 
+	   as a holder to hold the grain value.  */
+	if (TREE_CODE (t) == CILK_FOR)
+	  {
+	    tree l = OMP_FOR_CLAUSES (t);
+	    l = RECUR (OMP_CLAUSE_SAFELEN_EXPR (l));
+	    clauses = build_omp_clause (EXPR_LOCATION (l),
+					OMP_CLAUSE_SAFELEN);
+	    OMP_CLAUSE_SAFELEN_EXPR (clauses) = l;
+	    OMP_CLAUSE_CHAIN (clauses) = NULL_TREE;
+	  } 
+	else
+	  clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false,
+					args, complain, in_decl);
 	if (OMP_FOR_INIT (t) != NULL_TREE)
 	  {
 	    declv = make_tree_vec (TREE_VEC_LENGTH (OMP_FOR_INIT (t)));
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 0bb64c7..cc1a013 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -5965,7 +5965,8 @@  finish_omp_task (tree clauses, tree body)
 static bool
 handle_omp_for_class_iterator (int i, location_t locus, tree declv, tree initv,
 			       tree condv, tree incrv, tree *body,
-			       tree *pre_body, tree clauses)
+			       tree *pre_body, tree clauses,
+			       bool is_cilk_for)
 {
   tree diff, iter_init, iter_incr = NULL, last;
   tree incr_var = NULL, orig_pre_body, orig_body, c;
@@ -5985,6 +5986,7 @@  handle_omp_for_class_iterator (int i, location_t locus, tree declv, tree initv,
     case GE_EXPR:
     case LT_EXPR:
     case LE_EXPR:
+    case NE_EXPR:
       if (TREE_OPERAND (cond, 1) == iter)
 	cond = build2 (swap_tree_comparison (TREE_CODE (cond)),
 		       TREE_TYPE (cond), iter, TREE_OPERAND (cond, 0));
@@ -6128,6 +6130,11 @@  handle_omp_for_class_iterator (int i, location_t locus, tree declv, tree initv,
       break;
 
   decl = create_temporary_var (TREE_TYPE (diff));
+  /* In _Cilk_for we must know the induction variable name since it is
+     read by expand_cilk_for_body in omp-low.c to set the induction
+     variable in the child function correctly.  */
+  if (is_cilk_for)
+    DECL_NAME (decl) = make_anon_name ();
   pushdecl (decl);
   add_decl_expr (decl);
   last = create_temporary_var (TREE_TYPE (diff));
@@ -6343,8 +6350,24 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv, tree initv,
 				"iteration variable %qE", decl);
 	      return NULL;
 	    }
-	  if (handle_omp_for_class_iterator (i, locus, declv, initv, condv,
-					     incrv, &body, &pre_body, clauses))
+
+	  /* In _Cilk_for, all the iterator mapping code should be
+	     inserted in the OMP_PARALLEL_BODY.  */
+	  if (code == CILK_FOR)
+	    {
+	      tree the_body = OMP_PARALLEL_BODY (body);
+	      if (TREE_CODE (the_body) == BIND_EXPR)
+		the_body = BIND_EXPR_BODY (the_body);
+	      if (handle_omp_for_class_iterator (i, locus, declv, initv,
+						 condv, incrv, &the_body,
+						 &pre_body, clauses, true))
+		return NULL;
+	      else
+		BIND_EXPR_BODY (OMP_PARALLEL_BODY (body)) = the_body;
+	    }
+	  else if (handle_omp_for_class_iterator (i, locus, declv, initv,
+						  condv, incrv, &body,
+						  &pre_body, clauses, false))
 	    return NULL;
 	  continue;
 	}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
index 0ebc09a..ed73c34 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
@@ -7,7 +7,8 @@  int main (void)
 {
   int q = 0, ii = 0, jj = 0;
 
-  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" } */
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */
+    /* { dg-error "expected" "" { target c++ } 10 } */
     q = 5;
 
   _Cilk_for (; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
@@ -16,24 +17,30 @@  int main (void)
   _Cilk_for (int ii = 0; ; ii++) /* { dg-error "missing controlling predicate" } */
     q = 2;
 
-  _Cilk_for (int ii = 0; ii < 10, jj < 10; ii++)  /* { dg-error "expected ';' before ',' token" } */
+  _Cilk_for (int ii = 0; ii < 10, jj < 10; ii++)  /* { dg-error "expected ';' before ',' token" "" { target c } } */
+    /* { dg-error "invalid controlling predicate" "" { target c++ }  20 } */
     q = 5;
 
   _Cilk_for (int ii = 0; ii < 10; ) /* { dg-error "missing increment" } */
     q = 5;
 
-  _Cilk_for (int ii = 0, jj = 0; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
+
+  _Cilk_for (int ii = 0, jj = 0; ii < 10; ii++) /* { dg-error "expected" } */ 
     q = 5;
 
   _Cilk_for (volatile int vii = 0; vii < 10; vii++) /* { dg-error "iteration variable cannot be volatile" } */
     q = 5;
 
-  _Cilk_for (static int sii = 0; sii < 10; sii++) /* { dg-error "static variable" } */
+ 
+  _Cilk_for (static int sii = 0; sii < 10; sii++) /* { dg-error "static" } */
+
     q = 5;
 
+
   _Cilk_for (float fii = 3.47; fii < 5.23; fii++) /* { dg-error "invalid type for iteration variable" } */
     q = 5;
 
+
   _Cilk_for (int ii = 0; 10 > jj; ii++) /* { dg-error "invalid controlling predicate" } */
     q = 5;
 
@@ -43,7 +50,9 @@  int main (void)
   _Cilk_for (int ii = 10; ii >= 0; ii--) /* This is OK!  */
     q = 5;
 
-  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" } */
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */ 
+    /* { dg-error "expected" "" { target c++ }  53 } */
     q = 5;
+
   return 0;
 }
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
index ff8bc0a..e1e3217 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
@@ -29,13 +29,13 @@  int main(int argc, char **argv)
     Array1[ii] = 0;
 
 #pragma cilk grainsize = 1 
-  while (Array1[5] != 0) /* { dg-warning "grainsize pragma is not followed" } */
+  while (Array1[5] != 0) /* { dg-warning "is not followed by" } */
     {
     /* Blah */
     }
 
 #pragma cilk grainsize = 1 
-  int q = 0; /* { dg-warning "grainsize pragma is not followed" } */
+  int q = 0; /* { dg-warning "is not followed by" } */
   _Cilk_for (q = 0; q < 10; q++)
     Array1[q]  = 5;
 
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
new file mode 100644
index 0000000..8221371
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
@@ -0,0 +1,25 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 100
+#define CHECK_VALUE 5
+
+template <class T>
+int func (T start, T end)
+{
+  int Array[SIZE];
+  _Cilk_for (T ii = 0; ii < end; ii++)
+    Array[ii] = CHECK_VALUE;
+  
+  for (T ii = 0; ii < end; ii++)
+    if (Array[ii] != CHECK_VALUE)
+      __builtin_abort ();
+
+  return 0;
+}
+
+int main (void)
+{
+  return func <int> (0, 100) + func <long> (0, 100);
+}
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
new file mode 100644
index 0000000..2ac8c72
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
@@ -0,0 +1,52 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array;
+vector <int> array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back (ii);
+}
+#endif
+_Cilk_for (vector<int>::iterator iter = array.begin(); iter != array.end();
+          iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+for (vector<int>::iterator iter = array_serial.begin(); 
+     iter != array_serial.end(); iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
new file mode 100644
index 0000000..1cf3301
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
@@ -0,0 +1,72 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array,array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back(ii);
+}
+#endif
+_Cilk_for (vector<int>::reverse_iterator iter4 = array.rbegin(); 
+	   iter4 != array.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+
+_Cilk_for (vector<int>::reverse_iterator iter4 = array_serial.rbegin(); 
+	   iter4 != array_serial.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+_Cilk_for (vector<int>::reverse_iterator iter2 = array.rbegin(); 
+	   iter2 != array.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+for (vector<int>::reverse_iterator iter2 = array_serial.rbegin(); 
+     iter2 != array_serial.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
+
+
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
new file mode 100644
index 0000000..3e350a1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
@@ -0,0 +1,50 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <list>
+
+using namespace std;
+
+
+int main(int argc, char **argv)
+{
+  vector <int> number_list, number_list_serial;
+  int new_number = 0;
+  int no_elements = 0;
+  
+  if (argc != 2)
+  {
+    no_elements = 10000;
+  }
+
+
+  number_list.clear();
+  number_list_serial.clear();
+  for (int ii = 0; ii < no_elements; ii++)
+  {
+    number_list.push_back(new_number);
+    number_list_serial.push_back(new_number);
+  }
+
+  _Cilk_for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list[jj] = jj + no_elements;
+  }
+  for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list_serial[jj] = jj + no_elements;
+  }
+
+  for (int jj = 0; jj < no_elements; jj++)
+    if (number_list_serial[jj] != number_list[jj])
+      abort ();
+
+  return 0;
+}