Patchwork [3/4] Code generation for OpenCL.

login
register
mail settings
Submitter Sebastian Pop
Date Dec. 25, 2010, 6:26 a.m.
Message ID <1293258413-29902-4-git-send-email-sebpop@gmail.com>
Download mbox | patch
Permalink /patch/76665/
State New
Headers show

Comments

Sebastian Pop - Dec. 25, 2010, 6:26 a.m.
2010-12-25  Alexey Kravets  <kayrick@ispras.ru>

	* Makefile.in (OBJS-common): Add graphite-opencl-codegen.o,
	graphite-opencl-meta-clast.o and graphite-opencl.o.
	(graphite-opencl-codegen.o): New.
	(graphite-opencl-meta-clast.o): New.
	(graphite-opencl.o): New.
	(GTFILES): Add graphite-opencl.c.
	* common.opt (fgraphite-opencl): New.
	(fgraphite-opencl-cpu): New.
	(fgraphite-opencl-ignore-depth-heuristic): New.
	(fgraphite-opencl-ignore-mem-heuristic): New.
	(fgraphite-opencl-ignore-dep): New.
	(fgraphite-opencl-ignore-types): New.
	(fgraphite-opencl-depth-base): New.
	(fgraphite-opencl-debug): New.
	* graphite-opencl-codegen.c: New file.
	* graphite-opencl-meta-clast.c: New file.
	* graphite-opencl.c: New file.
	* graphite-opencl.h: New file.
	* graphite.c (graphite_transform_loops): Call graphite_opencl_init and
	graphite_opencl_finalize.
	* tree-ssa-loop.c (gate_graphite_transforms): Handle
	flag_graphite_opencl.
	* graphite-clast-to-gimple.c (gloog): Call opencl_transform_clast.
	* dbgcnt.def (opencl_scop_cnt): New counter.
---
 gcc/ChangeLog.graphite           |   27 +
 gcc/Makefile.in                  |   28 +
 gcc/common.opt                   |   32 +
 gcc/dbgcnt.def                   |    1 +
 gcc/graphite-clast-to-gimple.c   |   14 +-
 gcc/graphite-clast-to-gimple.h   |    6 +
 gcc/graphite-opencl-codegen.c    | 1916 +++++++++++++++++++++++++
 gcc/graphite-opencl-meta-clast.c |  784 ++++++++++
 gcc/graphite-opencl.c            | 2913 ++++++++++++++++++++++++++++++++++++++
 gcc/graphite-opencl.h            |  254 ++++
 gcc/graphite.c                   |    6 +
 gcc/tree-ssa-loop.c              |    3 +-
 12 files changed, 5979 insertions(+), 5 deletions(-)
 create mode 100644 gcc/graphite-opencl-codegen.c
 create mode 100644 gcc/graphite-opencl-meta-clast.c
 create mode 100644 gcc/graphite-opencl.c
 create mode 100644 gcc/graphite-opencl.h
Joseph S. Myers - Dec. 25, 2010, 8:52 a.m.
On Sat, 25 Dec 2010, Sebastian Pop wrote:

> +#include "tm.h"

Why does this file need tm.h?

> +#include "rtl.h"

Or, why does it need rtl.h, since rtl.h needs tm.h?  I'd have thought 
graphite-* files should be higher level than that.

> +#include <sys/time.h>

Please don't include random system headers in individual source files 
unless there is a good reason.  system.h already includes sys/time.h under 
appropriate conditions based on configure tests.

> +#include "tree.h"

This is the second include of tree.h in this new file.  I think you need 
to review all the includes carefully and work out which are actually 
needed - and of course update makefile dependencies after fixing 
unnecessary includes.

The same comments apply to at least two files added by this patch.

Patch

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index d667086..b9afdb8 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,3 +1,30 @@ 
+2010-12-25  Alexey Kravets  <kayrick@ispras.ru>
+
+	* Makefile.in (OBJS-common): Add graphite-opencl-codegen.o,
+	graphite-opencl-meta-clast.o and graphite-opencl.o.
+	(graphite-opencl-codegen.o): New.
+	(graphite-opencl-meta-clast.o): New.
+	(graphite-opencl.o): New.
+	(GTFILES): Add graphite-opencl.c.
+	* common.opt (fgraphite-opencl): New.
+	(fgraphite-opencl-cpu): New.
+	(fgraphite-opencl-ignore-depth-heuristic): New.
+	(fgraphite-opencl-ignore-mem-heuristic): New.
+	(fgraphite-opencl-ignore-dep): New.
+	(fgraphite-opencl-ignore-types): New.
+	(fgraphite-opencl-depth-base): New.
+	(fgraphite-opencl-debug): New.
+	* graphite-opencl-codegen.c: New file.
+	* graphite-opencl-meta-clast.c: New file.
+	* graphite-opencl.c: New file.
+	* graphite-opencl.h: New file.
+	* graphite.c (graphite_transform_loops): Call graphite_opencl_init and
+	graphite_opencl_finalize.
+	* tree-ssa-loop.c (gate_graphite_transforms): Handle
+	flag_graphite_opencl.
+	* graphite-clast-to-gimple.c (gloog): Call opencl_transform_clast.
+	* dbgcnt.def (opencl_scop_cnt): New counter.
+
 2010-12-25  Alexander Monakov  <amonakov@ispras.ru>
 
 	* graphite-dependences.c (graphite_outer_subscript_bound): New.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 74fe0e8..6106331 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1251,6 +1251,9 @@  OBJS-common = \
 	graph.o \
 	graphds.o \
 	graphite.o \
+	graphite-opencl-codegen.o \
+	graphite-opencl-meta-clast.o \
+	graphite-opencl.o \
 	graphite-blocking.o \
 	graphite-clast-to-gimple.o \
 	graphite-cloog-util.o \
@@ -2692,6 +2695,30 @@  graphite-blocking.o: graphite-blocking.c $(CONFIG_H) $(SYSTEM_H) \
    $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    $(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h \
    graphite.h graphite-poly.h graphite-ppl.h
+graphite-opencl-codegen.o: graphite-opencl-codegen.c $(CONFIG_H) \
+   $(SYSTEM_H) coretypes.h $(TM_H) \
+   $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+   $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+   $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+   pointer-set.h value-prof.h graphite-ppl.h sese.h \
+   graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+   graphite-dependences.h
+graphite-opencl-meta-clast.o: graphite-opencl-meta-clast.c $(CONFIG_H) \
+   $(SYSTEM_H) coretypes.h $(TM_H) \
+   $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+   $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+   $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+   pointer-set.h value-prof.h graphite-ppl.h sese.h \
+   graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+   graphite-dependences.h
+graphite-opencl.o: graphite-opencl.c $(CONFIG_H) \
+   $(SYSTEM_H) coretypes.h $(TM_H) \
+   $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+   $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+   $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+   pointer-set.h value-prof.h graphite-ppl.h sese.h \
+   graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+   graphite-dependences.h
 graphite-clast-to-gimple.o: graphite-clast-to-gimple.c $(CONFIG_H) \
    $(SYSTEM_H) coretypes.h $(TM_H) langhooks.h \
    $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(DIAGNOSTIC_CORE_H) \
@@ -3803,6 +3830,7 @@  GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/lto-symtab.c \
   $(srcdir)/tree-ssa-alias.h \
   $(srcdir)/ipa-prop.h \
+  $(srcdir)/graphite-opencl.c \
   $(srcdir)/lto-streamer.h \
   $(srcdir)/target-globals.h \
   @all_gtfiles@
diff --git a/gcc/common.opt b/gcc/common.opt
index 8ccbca3..68cafb4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1119,6 +1119,38 @@  floop-parallelize-all
 Common Report Var(flag_loop_parallelize_all) Optimization
 Mark all loops as parallel
 
+fgraphite-opencl
+Common Report Var(flag_graphite_opencl) Optimization
+Export OpenCL from graphite
+
+fgraphite-opencl-cpu
+Common Report Var(flag_graphite_opencl_cpu) Optimization
+Generate CPU oriented OpenCL code.
+
+fgraphite-opencl-ignore-depth-heuristic
+Common Report Var(flag_graphite_opencl_no_depth_check)
+Ignore depth heuristic in graphite-opencl pass.
+
+fgraphite-opencl-ignore-mem-heuristic
+Common Report Var(flag_graphite_opencl_no_memory_transfer_check)
+Ignore mem transfer heuristic in graphite-opencl pass.
+
+fgraphite-opencl-ignore-dep
+Common Report Var(flag_graphite_opencl_no_dep_check)
+Ignore dependency checking in graphite-opencl pass.
+
+fgraphite-opencl-ignore-types
+Common Report Var(flag_graphite_opencl_no_types_check)
+Ignore supported type checking in graphite-opencl pass.
+
+fgraphite-opencl-depth-base=
+Common RejectNegative Joined UInteger Init(0) Var(opencl_base_depth_const)
+Value for depth heuristic in graphite-opencl pass.
+
+fgraphite-opencl-debug
+Common Report Var(flag_graphite_opencl_debug)
+Add checks for opencl calls return values.
+
 floop-strip-mine
 Common Report Var(flag_loop_strip_mine) Optimization
 Enable Loop Strip Mining transformation
diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index 0492d66..c150710 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -184,3 +184,4 @@  DEBUG_COUNTER (sms_sched_loop)
 DEBUG_COUNTER (store_motion)
 DEBUG_COUNTER (split_for_sched2)
 DEBUG_COUNTER (tail_call)
+DEBUG_COUNTER (opencl_scop_cnt)
diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index 9c732aa..2d95144 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -41,6 +41,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "langhooks.h"
 #include "sese.h"
+#include "dbgcnt.h"
 
 #ifdef HAVE_cloog
 #include "cloog/cloog.h"
@@ -1517,10 +1518,15 @@  gloog (scop_p scop, htab_t bb_pbb_mapping)
 
   create_params_index (params_index, pc.prog);
 
-  translate_clast (region, context_loop, pc.stmt,
-		   if_region->true_region->entry,
-		   &newivs, newivs_index,
-		   bb_pbb_mapping, 1, params_index);
+  if (flag_graphite_opencl && dbg_cnt (opencl_scop_cnt))
+    opencl_transform_clast (pc.stmt, region, if_region->true_region->entry,
+                            scop, params_index);
+  else
+    translate_clast (region, context_loop, pc.stmt,
+                     if_region->true_region->entry,
+                     &newivs, newivs_index,
+                     bb_pbb_mapping, 1, params_index);
+
   graphite_verify ();
   scev_reset ();
   recompute_all_dominators ();
diff --git a/gcc/graphite-clast-to-gimple.h b/gcc/graphite-clast-to-gimple.h
index 20c486c..c072918 100644
--- a/gcc/graphite-clast-to-gimple.h
+++ b/gcc/graphite-clast-to-gimple.h
@@ -37,6 +37,12 @@  typedef struct bb_pbb_def
   poly_bb_p pbb;
 }bb_pbb_def;
 
+/* From graphite-opencl.c  */
+extern void opencl_transform_clast (struct clast_stmt *, sese, edge,
+                                    scop_p, htab_t);
+extern void graphite_opencl_finalize (edge);
+extern void graphite_opencl_init (void);
+
 /* From graphite-clast-to-gimple.c  */
 extern bool gloog (scop_p, htab_t);
 extern cloog_prog_clast scop_to_clast (scop_p, CloogState *);
diff --git a/gcc/graphite-opencl-codegen.c b/gcc/graphite-opencl-codegen.c
new file mode 100644
index 0000000..2f07fe1
--- /dev/null
+++ b/gcc/graphite-opencl-codegen.c
@@ -0,0 +1,1916 @@ 
+/* OpencCL code generation for GRAPHITE-OpenCL.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* OpenCL code genration for GRAPHITE-OpenCL.  This file implements
+   OpenCL kernel code generation from CLAST structures.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include <sys/time.h>
+#include "hashtab.h"
+#include "gimple-pretty-print.h"
+#include "tree.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+
+/* These functions implement code generation from different clast
+   structures.  */
+static void opencl_print_stmt_list (struct clast_stmt *, opencl_main, int);
+static void opencl_print_for (struct clast_for *, opencl_main, int);
+static void opencl_print_guard (struct clast_guard *, opencl_main, int);
+static void opencl_print_equation (struct clast_equation *, opencl_main);
+static void opencl_print_expr (struct clast_expr *, opencl_main);
+static void opencl_add_variable (const char *, tree,  opencl_main);
+static void opencl_print_term (struct clast_term *, opencl_main);
+static void opencl_print_reduction (struct clast_reduction *, opencl_main);
+static void opencl_print_sum (struct clast_reduction *, opencl_main);
+static void opencl_print_binary (struct clast_binary *, opencl_main);
+static void opencl_print_minmax_c (struct clast_reduction *, opencl_main);
+
+/* These function implement code generation from different gimple
+   objects.  */
+static void opencl_print_bb (basic_block, opencl_main);
+static void opencl_print_gimple_assign_operation (gimple, opencl_main);
+static void opencl_print_gimple_assign (gimple, opencl_main);
+static void opencl_print_gimple (gimple, opencl_main);
+static int opencl_print_operand (tree, bool, opencl_main);
+
+
+static void opencl_print_local_vars (const char *, const char *, const char *,
+                                     opencl_main);
+static void opencl_try_variable (opencl_main, tree);
+static const char *opencl_get_var_name (tree);
+static void opencl_build_defines (tree, opencl_main);
+static void opencl_expand_scalar_vars (opencl_main, gimple);
+static void opencl_add_function_arg (opencl_main, tree, const char *);
+static void opencl_add_data_refs_pbb (poly_bb_p, opencl_main);
+static void opencl_add_non_scalar_type_decl (tree, dyn_string_t, const char *);
+static const char *opencl_print_function_arg_with_type (const char *, tree);
+static bool check_and_mark_arg (opencl_main, const char *, bool);
+
+
+/* Compare two clast names based on their indexes.  */
+
+static int
+opencl_cmp_scat (const char *scat1, const char *scat2)
+{
+  int len_1 = strlen (scat1);
+  int len_2 = strlen (scat2);
+
+  if (len_1 > len_2)
+    return 1;
+
+  if (len_1 < len_2)
+    return -1;
+
+  return strcmp (scat1, scat2);
+}
+
+/* This function implements !strcmp (STR1, STR2) call.  */
+
+static int
+opencl_cmp_str (const void *str1, const void *str2)
+{
+  const char *c_str1 = (const char *) str1;
+  const char *c_str2 = (const char *) str2;
+  return !strcmp (c_str1, c_str2);
+}
+
+/* Get body of generating kernel function from CODE_GEN.  */
+
+static dyn_string_t
+opencl_get_current_body (opencl_main code_gen)
+{
+  return code_gen->current_body->body;
+}
+
+/* Get header of generating kernel function from CODE_GEN.  */
+
+static dyn_string_t
+opencl_get_current_header (opencl_main code_gen)
+{
+  return code_gen->current_body->header;
+}
+
+/* Appends a string STR to the header of the generating kernel
+   function from CODE_GEN.  */
+
+static void
+opencl_append_string_to_header (const char *str, opencl_main code_gen)
+{
+  dyn_string_t tmp = opencl_get_current_header (code_gen);
+  dyn_string_append_cstr (tmp, str);
+}
+
+/* Appends a string STR to the body of the generating kernel function
+   from CODE_GEN.  */
+
+static void
+opencl_append_string_to_body (const char *str, opencl_main code_gen)
+{
+  dyn_string_t tmp = opencl_get_current_body (code_gen);
+  dyn_string_append_cstr (tmp, str);
+}
+
+/* Appends an integer NUM to the string STR following FORMAT.  */
+
+static void
+opencl_append_int_to_str (dyn_string_t str, long num, const char *format)
+{
+  char tmp[100];
+  sprintf (tmp, format, num);
+  dyn_string_append_cstr (str, tmp);
+}
+
+/* Appends an integer NUM to the header of the generating kernel
+   function from CODE_GEN following FORMAT.  */
+
+static void
+opencl_append_num_to_header (opencl_main code_gen, long num,
+			     const char *format)
+{
+  dyn_string_t tmp = opencl_get_current_header (code_gen);
+  opencl_append_int_to_str (tmp, num, format);
+}
+
+/* Appends an integer NUM to the body of the generating kernel
+   function from CODE_GEN following FORMAT.  */
+
+static void
+opencl_append_num_to_body (opencl_main code_gen, long num, const char *format)
+{
+  dyn_string_t tmp = opencl_get_current_body (code_gen);
+  opencl_append_int_to_str (tmp, num, format);
+}
+
+/* Get TYPE scalar (base) part.  */
+
+static tree
+opencl_get_main_type (tree type)
+{
+  while (TREE_CODE (type) == ARRAY_TYPE
+	 || TREE_CODE (type) == POINTER_TYPE)
+    type = TREE_TYPE (type);
+  return build_pointer_type (type);
+}
+
+
+/* Create the base part of FUNCTION declaration, similar to this:
+   "__global void __opencl_function_0".  */
+
+static void
+opencl_create_function_code (opencl_body function)
+{
+  static int opencl_function_counter = 0;
+  dyn_string_t dest = function->header;
+  dyn_string_append_cstr (dest, "__kernel void");
+  dyn_string_append_cstr (dest, " ");
+  dyn_string_append_cstr (dest, "opencl_auto_function_");
+  opencl_append_int_to_str (dest, opencl_function_counter, "%ld");
+  dyn_string_append_cstr (dest, " (");
+  sprintf (function->name, "%s%d","opencl_auto_function_",
+           opencl_function_counter++);
+}
+
+/* Create new instance of opencl_body.  */
+
+static opencl_body
+opencl_body_create (void)
+{
+  opencl_body tmp = XNEW (struct graphite_opencl_kernel_body);
+
+  tmp->body = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+  tmp->pre_header = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+  tmp->header = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+  tmp->non_scalar_args = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+
+  tmp->num_of_data_writes = 0;
+  tmp->function_args = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+  tmp->function_args_to_pass = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+  tmp->data_refs = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+  opencl_create_function_code (tmp);
+
+  return tmp;
+}
+
+/* Check whether clast expression EXPT is constant in current loop nest.
+   FIRST_SCAT is the iterator of outermost loop in current loop nest.  */
+
+static bool
+opencl_constant_expression_p (struct clast_expr *expr, const char *first_scat)
+{
+  switch (expr->type)
+    {
+    case expr_term:
+      {
+	struct clast_term *term = (struct clast_term *) expr;
+	if (!(term->var))
+	  return true;
+	{
+	  const char *name = term->var;
+	  if (strstr (name, "scat_") != name)
+            return true;
+
+          return (opencl_cmp_scat (first_scat, name) == 1);
+	}
+      }
+    case expr_red:
+      {
+	struct clast_reduction *red = (struct clast_reduction *) expr;
+	int i;
+	for (i = 0; i < red->n; i++)
+          if (!opencl_constant_expression_p (red->elts [i], first_scat))
+            return false;
+
+	return true;
+      }
+    case expr_bin:
+      {
+	struct clast_binary *bin = (struct clast_binary *) expr;
+	return opencl_constant_expression_p (bin->LHS, first_scat);
+      }
+    default:
+      gcc_unreachable ();
+      return false;
+    }
+}
+
+/* Check whether the clast_for LOOP has constant bounds.  FIRST_SCAT
+   is the iterator of outermost loop in current loop nest.  */
+
+static bool
+opencl_constant_loop_bound_p (struct clast_for *loop, const char *first_scat)
+{
+  return opencl_constant_expression_p (loop->UB, first_scat)
+    && opencl_constant_expression_p (loop->LB, first_scat);
+}
+
+/* If clast loop PARENT has only one child and it's a loop too, return
+   this child.  Otherwise return NULL.  */
+
+static struct clast_for *
+opencl_get_single_loop_child (struct clast_for *parent)
+{
+  struct clast_stmt *body = parent->body;
+
+  if (body->next
+      || !CLAST_STMT_IS_A (body, stmt_for))
+    return NULL;
+
+  return (struct clast_for *) body;
+}
+
+/* Calculate the maximal depth of a perfect nested loop nest with LOOP
+   as outermost loop.  META holds meta information for loop LOOP,
+   DEPTH is the depth of LOOP in current loop nest, FIRST_SCAT is the
+   iterator of outermost loop in current loop nest.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static int
+opencl_get_perfect_nested_loop_depth (opencl_main code_gen,
+                                      opencl_clast_meta meta,
+                                      struct clast_for *loop,
+                                      int depth, const char *first_scat)
+{
+  struct clast_for *child;
+  if (dependency_in_clast_loop_p (code_gen, meta, loop, depth))
+    return 0;
+
+  child = opencl_get_single_loop_child (loop);
+
+  if (!child
+      || !opencl_constant_loop_bound_p (child, first_scat))
+    return 1;
+
+  return 1 + opencl_get_perfect_nested_loop_depth (code_gen, meta->body, child,
+                                                   depth + 1, first_scat);
+}
+
+/* Get the type of the loop iterator for loop, represented by STMT.
+   LEVEL is the depth of this loop in current loop nest.  CODE_GEN
+   holds information related to OpenCL code generation.  */
+
+static tree
+opencl_get_loop_iter_type (struct clast_for *stmt, opencl_main code_gen,
+                           int level)
+{
+  tree lb_type = gcc_type_for_clast_expr (stmt->LB, code_gen->region,
+                                          code_gen->newivs,
+					  code_gen->newivs_index,
+                                          code_gen->params_index);
+  tree ub_type = gcc_type_for_clast_expr (stmt->UB, code_gen->region,
+                                          code_gen->newivs,
+					  code_gen->newivs_index,
+                                          code_gen->params_index);
+  tree type = gcc_type_for_iv_of_clast_loop (stmt, level, lb_type, ub_type);
+
+  return type;
+}
+
+static const char *data_type;
+
+/* Simplified version of C-style type printing from c-aux-info.c.  */
+
+static const char *
+gen_type_1 (const char *ret_val, tree t)
+{
+  switch (TREE_CODE (t))
+    {
+    case POINTER_TYPE:
+      if (TYPE_READONLY (t))
+	ret_val = concat ("const ", ret_val, NULL);
+      if (TYPE_VOLATILE (t))
+	ret_val = concat ("volatile ", ret_val, NULL);
+
+      ret_val = concat ("*", ret_val, NULL);
+
+      if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE
+	  || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE)
+	ret_val = concat ("(", ret_val, ")", NULL);
+
+      ret_val = gen_type_1 (ret_val, TREE_TYPE (t));
+
+      return ret_val;
+
+    case ARRAY_TYPE:
+      if (!COMPLETE_TYPE_P (t) || TREE_CODE (TYPE_SIZE (t)) != INTEGER_CST)
+	ret_val = gen_type_1 (concat (ret_val, "[]", NULL), TREE_TYPE (t));
+      else if (int_size_in_bytes (t) == 0)
+	ret_val = gen_type_1 (concat (ret_val, "[0]", NULL), TREE_TYPE (t));
+      else
+	{
+	  int size = int_size_in_bytes (t) / int_size_in_bytes (TREE_TYPE (t));
+	  char buff[10];
+	  sprintf (buff, "[%d]", size);
+	  ret_val = gen_type_1 (concat (ret_val, buff, NULL), TREE_TYPE (t));
+	}
+      break;
+
+    case IDENTIFIER_NODE:
+      data_type = IDENTIFIER_POINTER (t);
+      break;
+
+    case TYPE_DECL:
+      data_type = IDENTIFIER_POINTER (DECL_NAME (t));
+      break;
+
+    case INTEGER_TYPE:
+    case FIXED_POINT_TYPE:
+      switch (TYPE_PRECISION (t))
+	{
+	case 8: data_type = "char"; break;
+	case 16: data_type = "short"; break;
+	case 32: data_type = "int"; break;
+	case 64: data_type = "long"; break;
+	default: gcc_unreachable ();
+	}
+      if (TYPE_UNSIGNED (t))
+	data_type = concat ("unsigned ", data_type, NULL);
+      break;
+
+    case REAL_TYPE:
+      switch (TYPE_PRECISION (t))
+	{
+	case 32: data_type = "float"; break;
+	case 64: data_type = "double"; break;
+	default: gcc_unreachable ();
+	}
+      break;
+
+    case VOID_TYPE:
+      data_type = "void";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (TYPE_READONLY (t))
+    ret_val = concat ("const ", ret_val, NULL);
+  if (TYPE_VOLATILE (t))
+    ret_val = concat ("volatile ", ret_val, NULL);
+  if (TYPE_RESTRICT (t))
+    ret_val = concat ("restrict ", ret_val, NULL);
+  return ret_val;
+}
+
+/* Generate a string representation of a declaration of varable named
+   NAME with type T.  */
+
+static const char *
+gen_type_with_name (const char *name, tree t)
+{
+  const char *type_part = gen_type_1 (name, t);
+  return concat (data_type, " ", type_part, NULL);
+}
+
+/* Replace perfect nested loop nest represented by F with opencl kernel.
+   For example, loop nest like this
+
+   | for (scat_1 = 0; scat_1 < M; i ++)
+   |   for (scat_2 = 0; scat_2 < N; j ++)
+   |     for (scat_3 = 0; scat_3 < L; k ++)
+   |       stmt (i, j, k)
+
+   will be replased by  kernel, where scat_1, scat_2, scat_3
+   depends on thread global id.  Number of threads for this kernel
+   will be M * N * L.  DEPTH is the depth of F in current loop nest.
+   CODE_GEN holds information related to OpenCL code generation.  BODY holds
+   information current OpenCL kernel.  */
+
+static void
+opencl_perfect_nested_to_kernel (opencl_main code_gen, struct clast_for *f,
+				 opencl_body body, int depth)
+{
+  VEC (tree, heap) *mod = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+
+  VEC (tree, heap) *function_args = body->function_args;
+  const int perfect_depth
+    = opencl_get_perfect_nested_loop_depth (code_gen, code_gen->curr_meta, f,
+					    depth, f->iterator);
+  struct clast_for *curr = f;
+  int counter = perfect_depth;
+  tree curr_base = integer_one_node;
+  basic_block calc_block = opencl_create_bb (code_gen);
+  opencl_append_string_to_body
+    ("size_t opencl_global_id = get_global_id (0);\n", code_gen);
+
+  body->first_iter = curr->iterator;
+
+  /* Iterate through all loops, which form perfect loop nest.  */
+  while (counter--)
+    {
+      tree iv;
+      sese region = code_gen->region;
+      VEC (tree, heap) *newivs = code_gen->newivs;
+      htab_t newivs_index = code_gen->newivs_index;
+      htab_t params_index = code_gen->params_index;
+      const char *tmp;
+      const char *decl;
+
+      tree type = opencl_get_loop_iter_type (curr, code_gen, depth);
+
+      const char *type_str = gen_type_with_name (" ", type);
+
+      tree low_bound = clast_to_gcc_expression (type, curr->LB, region,
+                                                newivs, newivs_index,
+                                                params_index);
+
+      tree up_bound = clast_to_gcc_expression (type, curr->UB, region,
+                                               newivs, newivs_index,
+                                               params_index);
+      long stride = 1;
+      tree t_stride;
+      tree curr_loop_size;
+      int num = perfect_depth - counter - 1;
+
+      gcc_assert (curr->LB);
+      gcc_assert (curr->UB);
+
+      body->last_iter = curr->iterator;
+
+      low_bound = opencl_tree_to_var (calc_block, low_bound);
+      up_bound = opencl_tree_to_var (calc_block, up_bound);
+
+      if (mpz_cmp_si (curr->stride, 1) > 0)
+	stride = mpz_get_si (curr->stride);
+
+      t_stride = build_int_cst (NULL_TREE, stride);
+
+      curr_loop_size = build2 (MINUS_EXPR, type,
+			       up_bound, low_bound);
+
+      curr_loop_size = build2 (PLUS_EXPR, type,
+			       curr_loop_size,
+			       fold_convert (type, integer_one_node));
+
+      if (stride != 1)
+	curr_loop_size
+	  = build2 (TRUNC_DIV_EXPR, type,
+		    curr_loop_size, t_stride);
+
+      curr_loop_size = opencl_tree_to_var (calc_block, curr_loop_size);
+
+      VEC_safe_push (tree, heap, mod, curr_loop_size);
+
+      iv = create_tmp_var (type, "scat_tmp_iter");
+
+      /* Declare loop iterator as local variable.  */
+      opencl_append_string_to_body (type_str, code_gen);
+      opencl_append_string_to_body (curr->iterator, code_gen);
+
+      /* Calculate the value of the iterator of current loop based of the
+	 number of current thread and numbers of iterators of inner loops.  */
+      opencl_append_string_to_body (" = ((opencl_global_id / ", code_gen);
+      opencl_append_num_to_body (code_gen, num, "opencl_base_%d");
+      opencl_append_string_to_body (") % ", code_gen);
+      opencl_append_num_to_body (code_gen, num, "opencl_mod_%d");
+      opencl_append_string_to_body (") * ", code_gen);
+      opencl_append_num_to_body (code_gen, stride, "%d");
+      opencl_append_num_to_body (code_gen, num, " + opencl_first_%d;\n");
+
+      opencl_append_string_to_header (type_str, code_gen);
+      opencl_append_num_to_header (code_gen, num, "opencl_mod_%d, ");
+      opencl_append_string_to_header (type_str, code_gen);
+      opencl_append_num_to_header (code_gen, num, "opencl_first_%d, ");
+
+      /* Declare old loop iterator.  */
+      tmp = opencl_get_var_name (iv);
+      check_and_mark_arg (code_gen, tmp, false);
+      decl = opencl_print_function_arg_with_type (tmp, type);
+      opencl_append_string_to_body (decl, code_gen);
+      opencl_append_string_to_body (" = ", code_gen);
+      opencl_append_string_to_body (curr->iterator, code_gen);
+      opencl_append_string_to_body (";\n", code_gen);
+
+      save_clast_name_index (code_gen->newivs_index, curr->iterator,
+                             VEC_length (tree, code_gen->newivs));
+      VEC_safe_push (tree, heap, code_gen->newivs, iv);
+
+      /* Save number of iterations for loop.  */
+      VEC_safe_push (tree, heap, function_args, curr_loop_size);
+      VEC_safe_push (tree, heap, function_args, low_bound);
+
+      body->clast_body = curr->body;
+      curr = opencl_get_single_loop_child (curr);
+      depth ++;
+    }
+
+  counter = perfect_depth;
+
+  /* Store number of iteration of inner loops for each loop in perfect
+     nest.  */
+  while (counter --)
+    {
+      tree type = TREE_TYPE (curr_base);
+      const char *type_str = gen_type_with_name (" ", type);
+      tree curr = VEC_index (tree, mod, counter);
+
+      opencl_append_string_to_header (type_str, code_gen);
+      opencl_append_num_to_header (code_gen, counter, "opencl_base_%d, ");
+
+      VEC_safe_push (tree, heap, function_args, curr_base);
+
+      curr_base = build2 (MULT_EXPR, type, curr_base,
+                          build1 (CONVERT_EXPR, type, curr));
+      curr_base = opencl_tree_to_var (calc_block, curr_base);
+    }
+
+  body->num_of_exec = fold_convert (integer_type_node, curr_base);
+  VEC_free (tree, heap, mod);
+}
+
+/* Generate code for loop statement F.  DEPTH is the depth of F in
+   current loop nest.  CODE_GEN holds information related to OpenCL
+   code generation.  */
+
+static opencl_body
+opencl_print_loop (struct clast_for *f, opencl_main code_gen, int depth)
+{
+  opencl_body current_body = code_gen->current_body;
+
+  code_gen->global_defined_vars
+    = htab_create (10, htab_hash_string, opencl_cmp_str, NULL);
+
+  opencl_perfect_nested_to_kernel (code_gen, f, current_body, depth);
+
+  /* Define local loop iterators.  */
+  opencl_print_local_vars (current_body->first_iter,
+			   current_body->last_iter,
+			   "unsigned int", code_gen);
+
+  /* Generate code for kernel body.  */
+  opencl_print_stmt_list (current_body->clast_body, code_gen, depth + 1);
+  opencl_append_string_to_body ("}\n", code_gen);
+
+  if (current_body->num_of_data_writes)
+    {
+      dyn_string_t header = current_body->header;
+      int offset;
+
+      dyn_string_append (header, current_body->non_scalar_args);
+      offset = dyn_string_length (header) - 2;
+
+      if (*(dyn_string_buf (header) + offset) == ',')
+        *(dyn_string_buf (header) + offset) = ' ';
+
+      opencl_append_string_to_header (")\n{\n", code_gen);
+    }
+
+  return current_body;
+}
+
+/* Generate OpenCL code for clast_assignment A.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_assignment (struct clast_assignment *a, opencl_main code_gen)
+{
+  /* Real assignment.  */
+  if (a->LHS)
+    {
+      opencl_append_string_to_body (a->LHS, code_gen);
+      opencl_append_string_to_body (" = ", code_gen);
+    }
+
+  /* Just expression.  */
+  opencl_print_expr (a->RHS, code_gen);
+}
+
+/* Return tree with variable, corresponging to given clast name NAME.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static tree
+opencl_clast_name_to_tree (opencl_main code_gen, const char *name)
+{
+  return clast_name_to_gcc (name, code_gen->region, code_gen->newivs,
+                            code_gen->newivs_index, code_gen->params_index);
+}
+
+/* For a given clast name return that name, if it's local name in
+   kernel body or, otherwise, name of gimple variable created for this
+   scat_i in gimple.  CODE_GEN holds information related to OpenCL
+   code generation.  */
+
+static const char *
+opencl_get_scat_real_name (opencl_main code_gen, const char *name)
+{
+  /* NAME > FIRST_ITER */
+  if (opencl_cmp_scat (name, code_gen->current_body->first_iter) >= 0)
+    return name;
+
+  return
+    opencl_get_var_name (opencl_clast_name_to_tree (code_gen, name));
+}
+
+/* Add clast variable (scat_i) as kernel argument.  NAME is a new name
+   of loop iterator (scat_*), REAL_NAME is an old (origin) name of
+   loop iterator.  CODE_GEN holds information related to OpenCL code
+   generation.  */
+
+static void
+opencl_add_scat_as_arg (opencl_main code_gen, const char *name,
+			const char *real_name)
+{
+  tree var;
+  if (!check_and_mark_arg (code_gen, real_name, false))
+    return;
+  var = opencl_clast_name_to_tree (code_gen, name);
+  opencl_add_function_arg (code_gen, var, real_name);
+}
+
+/* Generate OpenCL code for user statement U.  Code will be generated
+   from basic block, related to U.  Also induction variables mapping
+   to old variables must be calculated to process basic block.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_user_stmt (struct clast_user_stmt *u, opencl_main code_gen)
+{
+  CloogStatement * cs;
+  poly_bb_p pbb;
+  gimple_bb_p gbbp;
+  basic_block bb;
+  int i;
+  int nb_loops = number_of_loops ();
+  code_gen->iv_map = VEC_alloc (tree, heap, nb_loops);
+
+  for (i = 0; i < nb_loops; i++)
+    VEC_safe_push (tree, heap, code_gen->iv_map, NULL_TREE);
+  build_iv_mapping (code_gen->iv_map, code_gen->region,
+                    code_gen->newivs,
+                    code_gen->newivs_index, u,
+                    code_gen->params_index);
+
+  code_gen->defined_vars
+    = htab_create (10, htab_hash_string, opencl_cmp_str, NULL);
+  opencl_append_string_to_body ("{\n", code_gen);
+
+  cs = u->statement;
+  pbb = (poly_bb_p) cloog_statement_usr (cs);
+  gbbp = PBB_BLACK_BOX (pbb);
+  bb = GBB_BB (gbbp);
+  code_gen->context_loop = bb->loop_father;
+
+  opencl_add_data_refs_pbb (pbb, code_gen);
+  opencl_print_bb (bb, code_gen);
+  opencl_append_string_to_body ("}\n", code_gen);
+  htab_delete (code_gen->defined_vars);
+  code_gen->defined_vars = NULL;
+  VEC_free (tree, heap, code_gen->iv_map);
+}
+
+/* If tree node NODE defined in current sese build and insert define
+   statements for it, otherwise mark node as external (parameter for
+   kernel).  If tree defined in current sese, also recursively build
+   defines for all trees in definition expression.  */
+
+static void
+opencl_build_defines (tree node, opencl_main code_gen)
+{
+  enum tree_code code = TREE_CODE (node);
+  switch (code)
+    {
+    case SSA_NAME:
+      {
+	const char *tmp = opencl_get_var_name (node);
+	gimple def_stmt;
+
+	/* If name defined in other sese it is kernel's parameter.  */
+	if (!defined_in_sese_p (node, code_gen->region))
+          return;
+
+	/*  Bail out if this name was defined earlier either in this
+            or other region.  */
+        if (*(const char **)htab_find_slot (code_gen->defined_vars,
+                                            tmp, INSERT))
+          return;
+
+        /* Get definition statement.  */
+	def_stmt = SSA_NAME_DEF_STMT (node);
+	opencl_expand_scalar_vars (code_gen, def_stmt);
+	opencl_print_gimple (def_stmt, code_gen);
+	return;
+      }
+    case ARRAY_REF:
+      {
+	tree arr = TREE_OPERAND (node, 0);
+	tree offset = TREE_OPERAND (node, 1);
+	opencl_build_defines (arr, code_gen);
+	opencl_build_defines (offset, code_gen);
+	return;
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* For a given gimple statement STMT build definition for all names,
+   used in this stament.  If name has been defined in other sese, mark
+   it as kernel parameter.  CODE_GEN holds information related to
+   OpenCL code generation.  */
+
+static void
+opencl_expand_scalar_vars (opencl_main code_gen, gimple stmt)
+{
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
+    {
+      tree use = USE_FROM_PTR (use_p);
+      if (!is_gimple_reg (use))
+	continue;
+      opencl_build_defines (use, code_gen);
+    }
+}
+
+/* Generate code for a single basic block BB.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_bb (basic_block bb, opencl_main code_gen)
+{
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      opencl_expand_scalar_vars (code_gen, stmt);
+      opencl_print_gimple (stmt, code_gen);
+    }
+}
+
+/* Print operation simbol (`+' `-' `*') for assignment operation GMA.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_gimple_assign_operation (gimple gmp, opencl_main code_gen)
+{
+  opencl_append_string_to_body
+    (op_symbol_code (gimple_assign_rhs_code (gmp)), code_gen);
+}
+
+/* Print pointer expression represented by EXPR.  TYPE_SIZE represents
+   size of the base type for EXPR.  CODE_GEN holds information related
+   to OpenCL code generation.  */
+
+static void
+opencl_print_addr_operand (tree expr, tree type_size, opencl_main code_gen)
+{
+  if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
+    {
+      opencl_append_string_to_body ("(", code_gen);
+      opencl_print_operand (expr, false, code_gen);
+      opencl_append_string_to_body ("/", code_gen);
+      opencl_print_operand (type_size, false, code_gen);
+      opencl_append_string_to_body (")", code_gen);
+    }
+  else
+    opencl_print_operand (expr, false, code_gen);
+
+}
+
+/* Print unary gimple operation GMP.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_unary (gimple gmp, opencl_main code_gen)
+{
+  switch (gimple_assign_rhs_code (gmp))
+    {
+    case BIT_NOT_EXPR:
+      opencl_append_string_to_body ("~", code_gen);
+      return;
+    case TRUTH_NOT_EXPR:
+      opencl_append_string_to_body ("!", code_gen);
+      return;
+    case NEGATE_EXPR:
+      opencl_append_string_to_body ("-", code_gen);
+      return;
+    case MODIFY_EXPR:
+    default:
+      return;
+    }
+}
+
+/* Generate code for min or max gimple operand GMP.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_max_min_assign (gimple gmp, opencl_main code_gen)
+{
+  tree lhs = gimple_assign_lhs (gmp);
+  tree rhs1 = gimple_assign_rhs1 (gmp);
+  tree rhs2 = gimple_assign_rhs2 (gmp);
+  bool max = gimple_assign_rhs_code (gmp) == MAX_EXPR;
+
+  opencl_print_operand (lhs, true, code_gen);
+  opencl_append_string_to_body (max?" = fmax (":"= fmin (", code_gen);
+  opencl_print_operand (rhs1, false, code_gen);
+  opencl_append_string_to_body (",", code_gen);
+  opencl_print_operand (rhs2, false, code_gen);
+  opencl_append_string_to_body (");\n", code_gen);
+
+}
+
+/* Generate code for gimple assignment statement GMP.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_gimple_assign (gimple gmp, opencl_main code_gen)
+{
+  int num_of_ops = gimple_num_ops (gmp);
+  tree lhs;
+  tree rhs1;
+  tree rhs2;
+  bool addr_expr;
+  int result;
+  tree result_size = NULL;
+
+  if (gimple_assign_rhs_code (gmp) == MAX_EXPR
+      || gimple_assign_rhs_code (gmp) == MIN_EXPR)
+    {
+      opencl_print_max_min_assign (gmp, code_gen);
+      return;
+    }
+  gcc_assert (num_of_ops == 2 || num_of_ops == 3);
+  lhs = gimple_assign_lhs (gmp);
+
+  addr_expr = (TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE);
+  if (addr_expr)
+    result_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)));
+
+  rhs1 = gimple_assign_rhs1 (gmp);
+  rhs2 = gimple_assign_rhs2 (gmp);
+  result = opencl_print_operand (lhs, true, code_gen);
+  if (result != 0)
+    return;
+  opencl_append_string_to_body (" = ", code_gen);
+
+  if (addr_expr)
+    opencl_print_addr_operand (rhs1, result_size, code_gen);
+  else
+    {
+      if (rhs2 == NULL)
+        opencl_print_unary (gmp, code_gen);
+      opencl_print_operand (rhs1, false, code_gen);
+    }
+  if (rhs2 != NULL_TREE)
+    {
+      opencl_print_gimple_assign_operation (gmp, code_gen);
+      if (addr_expr)
+	opencl_print_addr_operand (rhs2, result_size, code_gen);
+      else
+	opencl_print_operand (rhs2, false, code_gen);
+    }
+  opencl_append_string_to_body (";\n",code_gen);
+}
+
+/* Generate code for arguments for gimple call statement GMP.
+   CODE_GEN hold information related to OpenCL code generation.  */
+
+static void
+opencl_print_gimple_call_args (opencl_main code_gen, gimple gmp)
+{
+  size_t len = gimple_call_num_args (gmp);
+  size_t i;
+  opencl_append_string_to_body (" (",code_gen);
+  for (i = 0; i < len; i++)
+    {
+      opencl_print_operand (gimple_call_arg (gmp, i), false, code_gen);
+      if (i < len - 1)
+	opencl_append_string_to_body (", ",code_gen);
+    }
+  opencl_append_string_to_body (")",code_gen);
+}
+
+/* Replace some function names.  */
+
+static const char *
+opencl_get_function_name (tree function)
+{
+  const char *gimple_name = IDENTIFIER_POINTER (DECL_NAME (function));
+  if (!strcmp (gimple_name, "__builtin_powf"))
+    return "pow";
+  return gimple_name;
+}
+
+/* Generate code for gimple call statement GMP.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_gimple_call (opencl_main code_gen, gimple gmp)
+{
+  tree lhs = gimple_call_lhs (gmp);
+  tree function = gimple_call_fn (gmp);
+  opencl_print_operand (lhs, true, code_gen);
+  opencl_append_string_to_body (" = ", code_gen);
+
+  while (TREE_CODE (function) == ADDR_EXPR
+	 || TREE_CODE (function) == INDIRECT_REF)
+    function = TREE_OPERAND (function, 0);
+  opencl_append_string_to_body (opencl_get_function_name (function), code_gen);
+  opencl_print_gimple_call_args (code_gen, gmp);
+  opencl_append_string_to_body (";\n",code_gen);
+}
+
+/* Generate code for gimple statment SMP.  Now only assignment
+   operation are supported, but it seems enough for clast translation.
+   GIMPLE_COND statements are loop bound conditions and can be safely
+   ignored.  CODE_GEN holds information related to OpenCL code
+   generation.  */
+
+static void
+opencl_print_gimple (gimple gmp, opencl_main code_gen)
+{
+  if (!gmp)
+    return;
+
+  switch (gimple_code (gmp))
+    {
+    case GIMPLE_ASSIGN:
+      opencl_print_gimple_assign (gmp, code_gen);
+      break;
+    case GIMPLE_COND:
+      break;
+    case GIMPLE_PHI:
+      break;
+    case GIMPLE_CALL:
+      opencl_print_gimple_call (code_gen, gmp);
+      break;
+    case GIMPLE_DEBUG:
+      break;
+    case GIMPLE_LABEL:
+      {
+	tree label = gimple_label_label (gmp);
+	opencl_print_operand (label, false, code_gen);
+	opencl_append_string_to_body (": ", code_gen);
+      }
+      break;
+    default:
+      debug_gimple_stmt (gmp);
+      gcc_unreachable ();
+    }
+}
+
+/* Get name of the variable, represented by tree NODE.  If variable is
+   temporary, generate name for it.  */
+
+static const char *
+opencl_get_var_name (tree node)
+{
+  bool ssa_name = TREE_CODE (node) == SSA_NAME;
+  tree name;
+  int num = 0;
+  if (ssa_name)
+    {
+      num = SSA_NAME_VERSION (node);
+      node = SSA_NAME_VAR (node);
+    }
+  name = DECL_NAME (node);
+  if (name)
+    {
+      if (!ssa_name)
+	return identifier_to_locale (IDENTIFIER_POINTER (name));
+      else
+	{
+	  const char *base = identifier_to_locale (IDENTIFIER_POINTER (name));
+	  char *buff = XNEWVEC (char, strlen (base) + 5);
+	  sprintf (buff, "%s_%d", base, num);
+	  return buff;
+	}
+    }
+  else
+    {
+      int tmp_var_uid = DECL_UID (node);
+      char *tmp = XNEWVEC (char, 30);
+      sprintf (tmp, "opencl_var_%d_%d", tmp_var_uid, num);
+      return tmp;
+    }
+}
+
+/* Append variable name NAME to function body.  Differs from appending
+   string by replacing `.' by `_'. CODE_GEN holds information related
+   to OpenCL code generation.  */
+
+static void
+opencl_append_var_name (const char *name, opencl_main code_gen)
+{
+  int len = strlen (name);
+  char *tmp = XNEWVEC (char, len + 1);
+  int i;
+  for (i = 0; i <= len; i++)
+    {
+      char tt = name[i];
+      if (tt == '.')
+	tt = '_';
+      tmp[i] = tt;
+    }
+  opencl_append_string_to_body (tmp, code_gen);
+  free (tmp);
+}
+
+/* If variable VAR_DECL is not defined and it is not marked as a
+   parameter, mark it as a parameter and add it to parameters list.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_try_variable (opencl_main code_gen, tree var_decl)
+{
+  const char *name = opencl_get_var_name (var_decl);
+  gcc_assert (code_gen->defined_vars);
+
+  if (check_and_mark_arg (code_gen, name, false))
+    opencl_add_function_arg (code_gen, var_decl, name);
+}
+
+/* Define non scalar variable, represented be DATA as either local
+   variable or kernel argument.  CODE_GEN holds information related to
+   OpenCL code generation.  */
+
+static void
+opencl_add_non_scalar_function_arg (opencl_main code_gen,
+                                    opencl_data data)
+{
+  const char *decl;
+  static int counter = 0;
+  opencl_body body = code_gen->current_body;
+  tree var = data->exact_object;
+  const char *name = opencl_get_var_name (var);
+  tree type = TREE_TYPE (var);
+
+  /* Check whether given variable can be privatized.  */
+  if (data->privatized)
+    {
+      /* Define variable as local variable.  */
+      gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+      decl = opencl_print_function_arg_with_type (name, type);
+      dyn_string_append_cstr (body->pre_header, decl);
+      dyn_string_append_cstr (body->pre_header, ";\n");
+      return;
+    }
+  else
+    {
+      /* Define variable as kernel argument.  */
+      char decl_name [30];
+      tree main_type = opencl_get_main_type (type);
+      sprintf (decl_name, "oclFTmpArg%d", counter++);
+      decl = opencl_print_function_arg_with_type (decl_name, main_type);
+      dyn_string_append_cstr (body->non_scalar_args, "__global ");
+      opencl_add_non_scalar_type_decl (var, body->pre_header, decl_name);
+      dyn_string_append_cstr (body->non_scalar_args, decl);
+      dyn_string_append_cstr (body->non_scalar_args, ", ");
+      VEC_safe_push (opencl_data, heap, body->data_refs, data);
+    }
+}
+
+/* Register data reference REF to variable DATA.  Do nothing, if it
+   has already been registered.  CODE_GEN holds information related to
+   OpenCL code generation.  */
+
+static void
+opencl_try_data_ref (opencl_main code_gen, data_reference_p ref,
+                     opencl_data data)
+{
+  tree var = dr_outermost_base_object (ref);
+  const char *name = opencl_get_var_name (var);
+  const char ** slot;
+  gcc_assert (code_gen->defined_vars);
+
+  slot = (const char **)htab_find_slot (code_gen->global_defined_vars,
+                                        name, INSERT);
+  if (*slot)
+    return;
+  *slot = name;
+  opencl_add_non_scalar_function_arg (code_gen, data);
+}
+
+/* Register data reference D_REF in current kernel.  CODE_GEN hold
+   information related to OpenCL code generation.  */
+
+static void
+opencl_add_data_ref (opencl_main code_gen, data_reference_p d_ref)
+{
+  opencl_data tmp = opencl_get_data_by_data_ref (code_gen, d_ref);
+
+  gcc_assert (tmp);
+  if (!DR_IS_READ (d_ref))
+    {
+      bitmap_set_bit (code_gen->curr_meta->modified_on_device, tmp->id);
+      tmp->written_in_current_body = true;
+      tmp->ever_written_on_device = true;
+      code_gen->current_body->num_of_data_writes ++;
+    }
+  else
+    {
+      tmp->read_in_current_body = true;
+      tmp->ever_read_on_device = true;
+    }
+  if (!tmp->privatized)
+    tmp->used_on_device = true;
+
+  opencl_try_data_ref (code_gen, d_ref, tmp);
+}
+
+/* Add base objects of all data references in PBB as arguments to
+   current kernel.  CODE_GEN holds information related to OpenCL code
+   generation.  */
+
+static void
+opencl_add_data_refs_pbb (poly_bb_p pbb, opencl_main code_gen)
+{
+  VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+  int i;
+  poly_dr_p curr;
+
+  for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+    {
+      data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+      opencl_add_data_ref (code_gen, d_ref);
+    }
+}
+
+/* Generate operand for tree node NODE.  If LSH is true, generated
+   operand must be lvalue, otherwise it's rvalue.  CODE_GEN holds
+   information related to OpenCL code generation.  Also generate
+   definitions for variables if necessary.  Variable definition is not
+   necessary if variable has already been defined or if it has been
+   defined in other sese.  */
+
+static int
+opencl_print_operand (tree node, bool lhs, opencl_main code_gen)
+{
+  tree scev = scalar_evolution_in_region (code_gen->region,
+                                          code_gen->context_loop,
+                                          node);
+  tree new_node = chrec_apply_map (scev, code_gen->iv_map);
+
+  if (TREE_CODE (new_node) != SCEV_NOT_KNOWN)
+    node = new_node;
+
+  switch (TREE_CODE (node))
+    {
+    case NOP_EXPR:
+      return opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+    case PLUS_EXPR:
+      {
+        if (lhs)
+          return -1;
+        opencl_append_string_to_body ("(", code_gen);
+        opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+        opencl_append_string_to_body (" + ", code_gen);
+        opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen);
+        opencl_append_string_to_body (")", code_gen);
+        return 0;
+      }
+    case MULT_EXPR:
+      {
+        if (lhs)
+          return -1;
+        opencl_append_string_to_body ("(", code_gen);
+        opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+        opencl_append_string_to_body (" * ", code_gen);
+        opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen);
+        opencl_append_string_to_body (")", code_gen);
+        return 0;
+      }
+
+    case SSA_NAME:
+      {
+	/* If rhs just add variable name.  Otherwise
+           it may be necessary to add variable definition.  */
+	const char *tmp = opencl_get_var_name (node);
+	if (lhs)
+          opencl_add_variable (tmp, node, code_gen);
+	else
+          opencl_append_var_name (tmp, code_gen);
+
+	/* This call adds variable declaration as formal
+	   parameter in kernel header if it is necessary.  */
+	opencl_try_variable (code_gen, node);
+	return 0;
+      }
+    case ARRAY_REF:
+      {
+	/* <operand>[<operand>].  */
+	tree arr = TREE_OPERAND (node, 0);
+	tree offset = TREE_OPERAND (node, 1);
+	opencl_print_operand (arr, false, code_gen);
+
+        opencl_append_string_to_body ("[", code_gen);
+        opencl_print_operand (offset, false, code_gen);
+        opencl_append_string_to_body ("]", code_gen);
+	return 0;
+      }
+    case INTEGER_CST:
+      {
+	/* Just print integer constant.  */
+	unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (node);
+        if (lhs)
+          return -1;
+	if (host_integerp (node, 0))
+          opencl_append_num_to_body (code_gen, (long)low, "%ld");
+	else
+	  {
+	    HOST_WIDE_INT high = TREE_INT_CST_HIGH (node);
+	    char buff[100];
+	    buff[0] = ' ';
+	    if (tree_int_cst_sgn (node) < 0)
+	      {
+		buff[0] = '-';
+		high = ~high + !low;
+		low = -low;
+	      }
+	    sprintf (buff + 1, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
+		     (unsigned HOST_WIDE_INT) high, low);
+	    opencl_append_string_to_body (buff, code_gen);
+	  }
+	return 0;
+      }
+    case REAL_CST:
+      {
+	char buff[100];
+	REAL_VALUE_TYPE tmp = TREE_REAL_CST (node);
+        if (lhs)
+          return -1;
+	real_to_decimal (buff, &tmp, sizeof (buff), 0, 1);
+	opencl_append_string_to_body (buff, code_gen);
+	return 0;
+      }
+    case FIXED_CST:
+      {
+	char buff[100];
+        if (lhs)
+          return -1;
+	fixed_to_decimal (buff, TREE_FIXED_CST_PTR (node), sizeof (buff));
+	opencl_append_string_to_body (buff, code_gen);
+	return 0;
+      }
+    case STRING_CST:
+      {
+	opencl_append_string_to_body ("\"", code_gen);
+	opencl_append_string_to_body (TREE_STRING_POINTER (node), code_gen);
+	opencl_append_string_to_body ("\"", code_gen);
+	return 0;
+      }
+    case VAR_DECL:
+    case PARM_DECL:
+      {
+	tree decl_name = DECL_NAME (node);
+	const char *tmp;
+	gcc_assert (decl_name);
+	tmp = IDENTIFIER_POINTER (decl_name);
+
+	opencl_append_var_name (tmp, code_gen);
+	opencl_try_variable (code_gen, node);
+	return 0;
+      }
+    case FIELD_DECL:
+      {
+	tree decl_name = DECL_NAME (node);
+	const char *tmp;
+	gcc_assert (decl_name);
+	tmp = IDENTIFIER_POINTER (decl_name);
+	opencl_append_var_name (tmp, code_gen);
+        return 0;
+      }
+    case LABEL_DECL:
+      {
+	tree decl_name = DECL_NAME (node);
+	if (decl_name)
+	  {
+	    const char *tmp = IDENTIFIER_POINTER (decl_name);
+	    opencl_append_var_name (tmp, code_gen);
+	    return 0;
+	  }
+
+	if (LABEL_DECL_UID (node) != -1)
+	  {
+	    opencl_append_num_to_body (code_gen, (int) LABEL_DECL_UID (node),
+				       "L%d");
+	    return 0;
+	  }
+	opencl_append_num_to_body (code_gen, (int) DECL_UID (node),
+				   "D_%u");
+	return 0;
+      }
+    case INDIRECT_REF:
+      {
+	opencl_append_string_to_body ("(*", code_gen);
+	opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+	opencl_append_string_to_body (")", code_gen);
+	return 0;
+      }
+    case ADDR_EXPR:
+      {
+	opencl_append_string_to_body ("&", code_gen);
+	opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+	return 0;
+      }
+    case COMPONENT_REF:
+      {
+	tree op1 = TREE_OPERAND (node, 0);
+	tree op2 = TREE_OPERAND (node, 1);
+	opencl_print_operand (op1, false, code_gen);
+
+	if (op1 && TREE_CODE (op1) == INDIRECT_REF)
+	  opencl_append_string_to_body ("->", code_gen);
+	else
+	  opencl_append_string_to_body (".", code_gen);
+
+	opencl_print_operand (op2, false, code_gen);
+	return 0;
+      }
+    default:
+      debug_tree (node);
+      gcc_unreachable ();
+    }
+
+  return 0;
+}
+
+/* Append variable VAR with name VAR_NAME to current function body.
+   If variable has been defined in current scope, but definition for
+   it has not been generated - then generate it's definition and mark
+   variable as defined.  CODE_GEN holds information related to OpenCL
+   code generation.  */
+
+static void
+opencl_add_variable (const char *var_name, tree var, opencl_main code_gen)
+{
+  const char ** slot;
+  if (htab_find (code_gen->global_defined_vars, var_name))
+    {
+      opencl_append_var_name (var_name, code_gen);
+      return;
+    }
+
+  slot = (const char **) htab_find_slot
+    (code_gen->defined_vars, var_name, INSERT);
+
+  if (! (*slot) && defined_in_sese_p (var, code_gen->region))
+    {
+      const char *decl;
+      tree type = TREE_TYPE (var);
+      *slot = var_name;
+      if (TREE_CODE (type) == POINTER_TYPE
+          || TREE_CODE (type) == ARRAY_TYPE)
+        {
+          opencl_add_non_scalar_type_decl (var, code_gen->current_body->body,
+                                           NULL);
+        }
+      else
+        {
+          var = SSA_NAME_VAR (var);
+          decl = opencl_print_function_arg_with_type (var_name, type);
+          opencl_append_string_to_body (decl, code_gen);
+        }
+      return;
+    }
+  opencl_append_var_name (var_name, code_gen);
+}
+
+/* Append list of names of loop iterators from CODE_GEN with same type
+   TYPE to current kernel.  FIRST and LAST define outermost and
+   innermost iterators to append respectively.  */
+
+static void
+opencl_print_local_vars (const char *fist, const char *last,
+			 const char *type, opencl_main code_gen)
+{
+  char ** names = code_gen->root_names->_scattering;
+  int len = code_gen->root_names->_nb_scattering;
+  int i;
+  for (i = 0; i < len; i++)
+    {
+      const char *tmp = names[i];
+      if (opencl_cmp_scat (fist, tmp) <= 0
+	  && opencl_cmp_scat (last, tmp) >= 0)
+	{
+	  const char ** slot =
+	    (const char **) htab_find_slot (code_gen->global_defined_vars,
+					    tmp, INSERT);
+	  *slot = tmp;
+	  continue;
+	}
+
+      if (opencl_cmp_scat (fist, tmp) > 0)
+	continue;
+
+      opencl_append_string_to_body (type, code_gen);
+      opencl_append_string_to_body (" ", code_gen);
+      opencl_append_string_to_body (tmp, code_gen);
+      opencl_append_string_to_body (";\n", code_gen);
+      *((const char **)htab_find_slot (code_gen->global_defined_vars,
+                                       tmp, INSERT)) = tmp;
+    }
+}
+
+/*  Replace all dots to underscores in string pointed to by P.  Return P.  */
+
+static char *
+filter_dots (char *p)
+{
+  char *s;
+  for (s = p; *s; s++)
+    if (*s == '.')
+      *s = '_';
+  return p;
+}
+
+/* Return string with varibale definition.  ARG_NAME is the name of
+   the variable and TYPE is it's type.  */
+
+static const char *
+opencl_print_function_arg_with_type (const char *arg_name, tree type)
+{
+  const char *decl = gen_type_with_name (arg_name, type);
+  char *ddecl;
+  ddecl = xstrdup (decl);
+  return filter_dots (ddecl);
+}
+
+/* Generate definition for non scalar variable VAR and place it to
+   string DEST.  Use DECL_NAME as variable name.  */
+
+static void
+opencl_add_non_scalar_type_decl (tree var, dyn_string_t dest,
+                                 const char *decl_name)
+{
+  tree type = TREE_TYPE (var);
+  const char *name = opencl_get_var_name (var);
+  static int counter = 0;
+  char type_name [30];
+  char *tmp_name = xstrdup (name);
+  const char *new_type;
+  tree inner_type = TREE_TYPE (type);
+
+  filter_dots (tmp_name);
+
+  sprintf (type_name, "oclFTmpType%d", counter++);
+
+  new_type = opencl_print_function_arg_with_type (type_name, inner_type);
+
+  dyn_string_append_cstr (dest, "typedef __global ");
+  dyn_string_append_cstr (dest, new_type);
+  dyn_string_append_cstr (dest, ";\n");
+
+  dyn_string_append_cstr (dest, type_name);
+  dyn_string_append_cstr (dest, " *");
+  dyn_string_append_cstr (dest, tmp_name);
+  if (decl_name != NULL)
+    {
+      dyn_string_append_cstr (dest, " = (");
+      dyn_string_append_cstr (dest, type_name);
+      dyn_string_append_cstr (dest, "*)");
+      dyn_string_append_cstr (dest, decl_name);
+      dyn_string_append_cstr (dest, ";\n");
+    }
+  free (tmp_name);
+
+}
+
+/* Check whether variable with name NAME has been defined as global or
+   local variable and mark it as defined.  This function returns false
+   if variable has already been defined, otherwise it returns true.  */
+
+static bool
+check_and_mark_arg (opencl_main code_gen, const char *name, bool local)
+{
+  const char ** slot;
+  gcc_assert (code_gen->defined_vars || !local);
+  if (code_gen->defined_vars)
+    {
+      slot = (const char **)htab_find_slot (code_gen->defined_vars,
+                                            name, INSERT);
+      if (*slot)
+        return false;
+      if (local)
+        *slot = name;
+    }
+
+  slot = (const char **)htab_find_slot (code_gen->global_defined_vars,
+                                        name, INSERT);
+  if (*slot)
+    return false;
+  if (!local)
+    *slot = name;
+  return true;
+}
+
+/* Add variable VAR with name NAME as function argument.  Append it's
+   declaration in finction header and add it as function parameter.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_add_function_arg (opencl_main code_gen, tree var, const char *name)
+{
+  opencl_body body;
+  const char *decl;
+  tree type;
+  type = TREE_TYPE (var);
+  body = code_gen->current_body;
+  decl = opencl_print_function_arg_with_type (name, type);
+  dyn_string_append_cstr (body->header, decl);
+  dyn_string_append_cstr (body->header, ", ");
+  VEC_safe_push (tree, heap, body->function_args, var);
+}
+
+/* Generate kernel function code for clast for statement F, located on
+   depth DEPTH.  CODE_GEN holds information related to OpenCL code
+   generation.  */
+
+opencl_body
+opencl_clast_to_kernel (struct clast_for * f, opencl_main code_gen,
+                        int depth)
+{
+  opencl_body tmp = opencl_body_create ();
+  code_gen->current_body = tmp;
+  return opencl_print_loop (f, code_gen, depth);
+}
+
+/* Generate code for clast statement S, located on depth DEPTH.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_stmt_list (struct clast_stmt *s, opencl_main code_gen, int depth)
+{
+  for ( ; s; s = s->next) {
+    gcc_assert (!CLAST_STMT_IS_A (s, stmt_root));
+    if (CLAST_STMT_IS_A (s, stmt_ass))
+      {
+	opencl_print_assignment ((struct clast_assignment *) s, code_gen);
+	opencl_append_string_to_body (";\n", code_gen);
+      }
+    else if (CLAST_STMT_IS_A (s, stmt_user))
+      opencl_print_user_stmt ((struct clast_user_stmt *) s, code_gen);
+    else if (CLAST_STMT_IS_A (s, stmt_for))
+      opencl_print_for ((struct clast_for *) s, code_gen, depth);
+    else if (CLAST_STMT_IS_A (s, stmt_guard))
+      opencl_print_guard ((struct clast_guard *) s, code_gen, depth);
+    else if (CLAST_STMT_IS_A (s, stmt_block))
+      {
+	opencl_append_string_to_body ("{\n", code_gen);
+	opencl_print_stmt_list (((struct clast_block *)s)->body, code_gen,
+				depth);
+	opencl_append_string_to_body ("}\n", code_gen);
+      }
+    else
+      gcc_unreachable ();
+  }
+}
+
+/* Generate code for clast for statement F, locate on depth LEVEL.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_for (struct clast_for *f, opencl_main code_gen, int level)
+{
+  tree iv;
+  tree iv_type;
+  const char *tmp;
+  const char *decl;
+  opencl_append_string_to_body ("for (", code_gen);
+  if (f->LB)
+    {
+      opencl_append_string_to_body (f->iterator, code_gen);
+      opencl_append_string_to_body ("=", code_gen);
+      opencl_print_expr (f->LB, code_gen);
+    }
+  opencl_append_string_to_body (";", code_gen);
+
+  if (f->UB)
+    {
+      opencl_append_string_to_body (f->iterator, code_gen);
+      opencl_append_string_to_body ("<=", code_gen);
+      opencl_print_expr (f->UB, code_gen);
+    }
+  opencl_append_string_to_body (";", code_gen);
+
+  if (value_gt_si (f->stride, 1))
+    {
+      opencl_append_string_to_body (f->iterator, code_gen);
+      opencl_append_string_to_body ("+=", code_gen);
+      opencl_append_num_to_body (code_gen, mpz_get_si (f->stride), "%d)\n{\n");
+    }
+  else
+    {
+      opencl_append_string_to_body (f->iterator, code_gen);
+      opencl_append_string_to_body ("++", code_gen);
+      opencl_append_string_to_body (")\n{\n", code_gen);
+    }
+  iv_type = opencl_get_loop_iter_type (f, code_gen, level);
+  iv = create_tmp_var (iv_type, "scat_tmp_iter");
+
+  tmp = opencl_get_var_name (iv);
+  check_and_mark_arg (code_gen, tmp, false);
+  decl = opencl_print_function_arg_with_type (tmp, iv_type);
+  opencl_append_string_to_body (decl, code_gen);
+
+  opencl_append_string_to_body (" = ", code_gen);
+  opencl_append_string_to_body (f->iterator, code_gen);
+  opencl_append_string_to_body (";\n", code_gen);
+
+  save_clast_name_index (code_gen->newivs_index, f->iterator,
+			 VEC_length (tree, code_gen->newivs));
+  VEC_safe_push (tree, heap, code_gen->newivs, iv);
+
+  opencl_print_stmt_list (f->body, code_gen, level + 1);
+  opencl_append_string_to_body ("}\n", code_gen);
+}
+
+/* Generate code for clast conditional statement G, locate on depth DEPTH.
+   CODE_GEN holds information related to OpenCL code generation.  */
+
+static void
+opencl_print_guard (struct clast_guard *g, opencl_main code_gen, int depth)
+{
+  int k;
+  opencl_append_string_to_body ("if ", code_gen);
+  if (g->n > 1)
+    opencl_append_string_to_body ("(", code_gen);
+  for (k = 0; k < g->n; ++k)
+    {
+      if (k > 0)
+        opencl_append_string_to_body (" && ", code_gen);
+      opencl_append_string_to_body ("(", code_gen);
+      opencl_print_equation (&g->eq[k], code_gen);
+      opencl_append_string_to_body (")", code_gen);
+    }
+  if (g->n > 1)
+    opencl_append_string_to_body (")", code_gen);
+  opencl_append_string_to_body (" {\n", code_gen);
+  opencl_print_stmt_list (g->then, code_gen, depth);
+  opencl_append_string_to_body ("}\n", code_gen);
+}
+
+
+/* Generate code for clast equation EQ.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_equation (struct clast_equation *eq, opencl_main code_gen)
+{
+  opencl_print_expr (eq->LHS, code_gen);
+  if (eq->sign == 0)
+    opencl_append_string_to_body (" == ", code_gen);
+  else if (eq->sign > 0)
+    opencl_append_string_to_body (" >= ", code_gen);
+  else
+    opencl_append_string_to_body (" <= ", code_gen);
+  opencl_print_expr (eq->RHS, code_gen);
+}
+
+/* Generate code for clast expression E.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_expr (struct clast_expr *e, opencl_main code_gen)
+{
+  if (!e)
+    return;
+  switch (e->type)
+    {
+    case expr_term:
+      opencl_print_term ((struct clast_term*) e, code_gen);
+      break;
+    case expr_red:
+      opencl_print_reduction ((struct clast_reduction*) e, code_gen);
+      break;
+    case expr_bin:
+      opencl_print_binary ((struct clast_binary*) e, code_gen);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Generate code for clast term T.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_term (struct clast_term *t, opencl_main code_gen)
+{
+  if (t->var)
+    {
+      const char *real_name = opencl_get_scat_real_name (code_gen, t->var);
+      if (value_one_p (t->val))
+	opencl_append_var_name (real_name, code_gen);
+      else if (value_mone_p (t->val))
+	{
+	  opencl_append_string_to_body ("-", code_gen);
+	  opencl_append_var_name (real_name, code_gen);
+	}
+      else
+	{
+	  opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d");
+	  opencl_append_string_to_body ("*", code_gen);
+	  opencl_append_var_name (real_name, code_gen);
+	}
+      opencl_add_scat_as_arg (code_gen, t->var, real_name);
+    }
+  else
+    opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d");
+}
+
+/* Generate code for clast reduction statement R.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_reduction (struct clast_reduction *r, opencl_main  code_gen)
+{
+  switch (r->type)
+    {
+    case clast_red_sum:
+      opencl_print_sum (r, code_gen);
+      break;
+    case clast_red_min:
+    case clast_red_max:
+      if (r->n == 1)
+	{
+	  opencl_print_expr (r->elts[0], code_gen);
+	  break;
+	}
+      opencl_print_minmax_c (r, code_gen);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Generate code for clast sum statement R.  CODE_GEN holds information
+   related to OpenCL code generation.  */
+
+static void
+opencl_print_sum (struct clast_reduction *r, opencl_main code_gen)
+{
+  int i;
+  struct clast_term *t;
+
+  gcc_assert (r->n >= 1 && r->elts[0]->type == expr_term);
+  t = (struct clast_term *) r->elts[0];
+  opencl_print_term (t, code_gen);
+
+  for (i = 1; i < r->n; ++i)
+    {
+      gcc_assert (r->elts[i]->type == expr_term);
+      t = (struct clast_term *) r->elts[i];
+      if (value_pos_p (t->val))
+	opencl_append_string_to_body ("+", code_gen);
+      opencl_print_term (t, code_gen);
+    }
+}
+
+/* Generate code for clast binary operation B.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_binary (struct clast_binary *b, opencl_main code_gen)
+{
+  const char *s1 = NULL, *s2 = NULL, *s3 = NULL;
+  bool group = (b->LHS->type == expr_red
+		&& ((struct clast_reduction*) b->LHS)->n > 1);
+
+  switch (b->type)
+    {
+    case clast_bin_fdiv:
+      s1 = "floor ((", s2 = ")/(", s3 = "))";
+      break;
+    case clast_bin_cdiv:
+      s1 = "ceil ((", s2 = ")/(", s3 = "))";
+      break;
+    case clast_bin_div:
+      if (group)
+	s1 = "(", s2 = ")/", s3 = "";
+      else
+	s1 = "", s2 = "/", s3 = "";
+      break;
+    case clast_bin_mod:
+      if (group)
+	s1 = "(", s2 = ")%", s3 = "";
+      else
+	s1 = "", s2 = "%", s3 = "";
+      break;
+    }
+
+  opencl_append_string_to_body (s1, code_gen);
+  opencl_print_expr (b->LHS, code_gen);
+  opencl_append_string_to_body (s2, code_gen);
+  opencl_append_num_to_body (code_gen, mpz_get_si (b->RHS), "%d");
+  opencl_append_string_to_body (s3, code_gen);
+}
+
+/* Generate code for clast min/max operation R.  CODE_GEN holds
+   information related to OpenCL code generation.  */
+
+static void
+opencl_print_minmax_c ( struct clast_reduction *r, opencl_main code_gen)
+{
+  int i;
+  for (i = 1; i < r->n; ++i)
+    opencl_append_string_to_body (r->type == clast_red_max ? "max (" : "min (",
+				  code_gen);
+  if (r->n > 0)
+    {
+      opencl_append_string_to_body ("(unsigned int)(", code_gen);
+      opencl_print_expr (r->elts[0], code_gen);
+      opencl_append_string_to_body (")", code_gen);
+    }
+  for (i = 1; i < r->n; ++i)
+    {
+      opencl_append_string_to_body (",", code_gen);
+      opencl_append_string_to_body ("(unsigned int)(", code_gen);
+      opencl_print_expr (r->elts[i], code_gen);
+      opencl_append_string_to_body ("))", code_gen);
+    }
+}
+
+#endif
diff --git a/gcc/graphite-opencl-meta-clast.c b/gcc/graphite-opencl-meta-clast.c
new file mode 100644
index 0000000..4fc39a9
--- /dev/null
+++ b/gcc/graphite-opencl-meta-clast.c
@@ -0,0 +1,784 @@ 
+/* Build meta information from clast data structures for GRAPHITE-OpenCL.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file implements building meta infromation about data
+   references, supported types and operations and dependencies.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include "hashtab.h"
+#include "tree.h"
+#include "gimple-pretty-print.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+/* Something like C++ std::map<int, int>.  */
+
+struct opencl_pair_def
+{
+  int id;
+  int val;
+};
+
+typedef struct opencl_pair_def * opencl_pair;
+
+/* Hash function for opencl_pair.  */
+
+static hashval_t
+opencl_pair_to_hash (const void * data)
+{
+  const struct opencl_pair_def * obj = (const struct opencl_pair_def *)data;
+  return (hashval_t) (obj->id);
+}
+
+/* Compare function for opencl_pair.  */
+
+static int
+opencl_pair_cmp (const void * e1, const void * e2)
+{
+  const struct opencl_pair_def * obj1 = (const struct opencl_pair_def *)e1;
+  const struct opencl_pair_def * obj2 = (const struct opencl_pair_def *)e2;
+
+  return obj1->id == obj2->id;
+}
+
+/* Create new opencl_pair with NEW_ID as id and NEW_VAL as val.  */
+
+static opencl_pair
+opencl_pair_create (int new_id, int new_val)
+{
+  opencl_pair tmp = XNEW (struct opencl_pair_def);
+  tmp->id = new_id;
+  tmp->val = new_val;
+  return tmp;
+}
+
+/* Delete opencl_pair DATA.  */
+
+static void
+opencl_pair_delete (opencl_pair data)
+{
+  free (data);
+}
+
+/* Create new opencl_clast_meta structure with PARENT as parent,
+   DEPTH as out_depth.  If ACCESS_INIT is true, then init access bitmaps.  */
+
+static opencl_clast_meta
+opencl_clast_meta_create (int depth, opencl_clast_meta parent,
+                          bool access_init)
+{
+  opencl_clast_meta tmp = XNEW (struct opencl_clast_meta_def);
+  tmp->out_depth = depth;
+  tmp->in_depth = 0;
+  tmp->next = NULL;
+  tmp->body = NULL;
+  tmp->parent = parent;
+  tmp->on_device = false;
+  tmp->modified_on_host = BITMAP_ALLOC (NULL);
+  tmp->modified_on_device = BITMAP_ALLOC (NULL);
+  tmp->access_unsupported = false;
+  if (access_init)
+    {
+      tmp->can_be_private = BITMAP_ALLOC (NULL);
+      tmp->access = BITMAP_ALLOC (NULL);
+    }
+  else
+    {
+      tmp->access = NULL;
+      tmp->can_be_private = NULL;
+    }
+  return tmp;
+}
+
+/* Check whether type TYPE is supported by current graphite-opencl
+   implementation.  If PTR or ARRAY is true, then TYPE can not be pointer
+   type (because only one level of pointers is supported). Also if PTR is
+   true, TYPE can not be array (because pointers to arrays are not
+   supported yet).  */
+
+static bool
+opencl_supported_type_p (tree type, bool ptr, bool array)
+{
+  switch (TREE_CODE (type))
+    {
+    case POINTER_TYPE:
+      {
+	if (ptr || array)
+	  return false;
+	return opencl_supported_type_p (TREE_TYPE (type), true, false);
+      }
+    case ARRAY_TYPE:
+      {
+	if (ptr)
+	  return false;
+	return opencl_supported_type_p (TREE_TYPE (type), false, true);
+      }
+    case FUNCTION_DECL:
+    case FUNCTION_TYPE:
+    case COMPLEX_TYPE:
+    case RECORD_TYPE:
+    case ENUMERAL_TYPE:
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+    case METHOD_TYPE:
+    case REFERENCE_TYPE:
+      return false;
+
+    case BOOLEAN_TYPE:
+    case INTEGER_TYPE:
+    case REAL_TYPE:
+      return true;
+    case VOID_TYPE:
+      return true;
+    case OFFSET_TYPE:
+    case FIXED_POINT_TYPE:
+    case VECTOR_TYPE:
+    case LANG_TYPE:
+    default:
+      debug_tree (type);
+      gcc_unreachable ();
+    }
+}
+
+/* Check whether expression ARG is supported by current graphite-opencl
+   implementation.  */
+
+static bool
+opencl_supported_arg_p (opencl_main code_gen, tree arg)
+{
+  switch (TREE_CODE (arg))
+    {
+    case SSA_NAME:
+      return opencl_supported_arg_p (code_gen, SSA_NAME_VAR (arg));
+
+    case ARRAY_REF:
+    case INDIRECT_REF:
+    case ADDR_EXPR:
+      return opencl_supported_arg_p (code_gen, TREE_OPERAND (arg, 0));
+
+    case VAR_DECL:
+    case PARM_DECL:
+      {
+	tree type = TREE_TYPE (arg);
+	if (TREE_CODE (type) == POINTER_TYPE)
+	  if (!opencl_get_data_by_tree (code_gen, arg))
+	    return false;
+
+	return opencl_supported_type_p (type, false, false);
+      }
+
+    case INTEGER_CST:
+    case REAL_CST:
+    case POINTER_PLUS_EXPR:
+      return true;
+
+    case FIELD_DECL:
+    case COMPONENT_REF:
+    case MEM_REF:
+    case REALPART_EXPR:
+    case IMAGPART_EXPR:
+    case COMPLEX_EXPR:
+    case CALL_EXPR:
+    case RESULT_DECL:
+      return false;
+
+    default:
+      debug_tree (arg);
+      gcc_unreachable ();
+    }
+}
+
+/* Check whether gimple assignment statement GMP is supported by current
+   graphite-opencl implementation.  CODE_GEN holds information about non
+   scalar arguments.  */
+
+static bool
+opencl_gimple_assign_with_supported_types_p (opencl_main code_gen, gimple gmp)
+{
+  tree curr_tree;
+  int num_of_ops = gimple_num_ops (gmp);
+  gcc_assert (gimple_code (gmp) == GIMPLE_ASSIGN);
+  gcc_assert (num_of_ops == 2 || num_of_ops == 3);
+
+  curr_tree = gimple_assign_lhs (gmp);
+  if (!opencl_supported_arg_p (code_gen, curr_tree))
+    return false;
+
+  curr_tree = gimple_assign_rhs1 (gmp);
+  if (!opencl_supported_arg_p (code_gen, curr_tree))
+    return false;
+
+  if (num_of_ops == 3)
+    {
+      curr_tree = gimple_assign_rhs2 (gmp);
+      if (!opencl_supported_arg_p (code_gen, curr_tree))
+        return false;
+    }
+  return true;
+}
+
+/* Check whether all statements in basic block BB are supported by current
+   graphite-opencl implementation.  CODE_GEN holds information about non
+   scalar arguments.  */
+
+static bool
+opencl_supported_type_access_p (opencl_main code_gen, basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      if (!stmt)
+        continue;
+      switch (gimple_code (stmt))
+        {
+        case GIMPLE_DEBUG:
+        case GIMPLE_COND:
+        case GIMPLE_PHI:
+        case GIMPLE_LABEL:
+          continue;
+        case GIMPLE_ASSIGN:
+          if (!opencl_gimple_assign_with_supported_types_p (code_gen, stmt))
+            {
+              if (dump_file && (dump_flags & TDF_DETAILS))
+                {
+                  fprintf (dump_file, "opencl_supported_type_access_p:"
+                           " bad types in assignment\n");
+                  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
+                }
+              return false;
+            }
+          continue;
+        case GIMPLE_CALL:
+          return false;
+        default:
+          debug_gimple_stmt (stmt);
+          gcc_unreachable ();
+        }
+    }
+  return true;
+}
+
+
+/* Mark variable, represented by tree OBJ as visited in bitmap VISITED.
+   If DEF is true and given variable can be privatized, mark it as
+   privatized in META.  CODE_GEN holds information about non
+   scalar arguments.  */
+
+static void
+opencl_def_use_data (opencl_main code_gen, tree obj, bitmap visited,
+                     opencl_clast_meta meta, bool def)
+{
+  opencl_data data;
+  if (obj == NULL)
+    return;
+  data = opencl_get_data_by_tree (code_gen,
+                                  opencl_get_base_object_by_tree (obj));
+  if (data == NULL)
+    return;
+
+  if (!data->can_be_private)
+    return;
+
+  if (!bitmap_set_bit (visited, data->id))
+    return;
+
+  if (!def)
+    return;
+
+  bitmap_set_bit (meta->can_be_private, data->id);
+}
+
+/* Mark data in META, corresponding to basic block BB, which can be
+   privatized.  CODE_GEN holds information about non
+   scalar arguments.  */
+
+static void
+opencl_calc_bb_privatization (opencl_main code_gen, basic_block bb,
+                              opencl_clast_meta meta)
+{
+  gimple_stmt_iterator gsi;
+  bitmap visited = BITMAP_ALLOC (NULL);
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      if (gimple_code (stmt) != GIMPLE_ASSIGN)
+        continue;
+      opencl_def_use_data (code_gen, gimple_assign_lhs (stmt),
+                           visited, meta, true);
+
+      opencl_def_use_data (code_gen, gimple_assign_rhs1 (stmt),
+                           visited, meta, false);
+
+      opencl_def_use_data (code_gen, gimple_assign_rhs2 (stmt),
+                           visited, meta, false);
+    }
+  BITMAP_FREE (visited);
+}
+
+/* Analyse clast_user_stmt STMT and set read/write flags for each data
+   reference in this statement in clast meta corresponding to this
+   statement.  If some data references in statement are unsupported,
+   then mark META as accessing unsupported.  CODE_GEN holds
+   information about non scalar arguments.  */
+
+static void
+opencl_set_meta_rw_flags (opencl_clast_meta meta,
+                          struct clast_user_stmt * stmt,
+                          opencl_main code_gen)
+{
+  CloogStatement *cs = stmt->statement;
+  poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
+  VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+  gimple_bb_p gbb = PBB_BLACK_BOX (pbb);
+  basic_block bb = GBB_BB (gbb);
+  int i;
+  poly_dr_p curr;
+  if (!opencl_supported_type_access_p (code_gen, bb))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+        {
+          fprintf (dump_file, "Basic block contains unsupported "
+		   "types in graphite-opencl\n");
+          dump_bb (bb, dump_file, 0);
+
+        }
+
+      meta->access_unsupported = true;
+    }
+  opencl_calc_bb_privatization (code_gen, bb, meta);
+  for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+    {
+      data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+      tree data_ref_tree = dr_outermost_base_object (d_ref);
+      opencl_data data;
+
+      if (!opencl_supported_arg_p (code_gen, data_ref_tree))
+        {
+          meta->access_unsupported = true;
+
+          if (dump_file && (dump_flags & TDF_DETAILS))
+            {
+              fprintf (dump_file,
+		       "Unsupported in graphite-opencl data reference\n");
+              dump_data_reference (dump_file, d_ref);
+
+            }
+          continue;
+        }
+
+      data = opencl_get_data_by_data_ref (code_gen, d_ref);
+      gcc_assert (data);
+
+      if (!data->supported)
+        meta->access_unsupported = true;
+
+      if (!graphite_outer_subscript_bound (curr, false))
+        {
+          meta->access_unsupported = true;
+          if (dump_file && (dump_flags & TDF_DETAILS))
+            {
+              fprintf (dump_file, "Can not determine subscript bound "
+		       "for data reference\n");
+              dump_data_reference (dump_file, d_ref);
+
+            }
+
+          continue;
+        }
+
+      if (data->size_value == NULL)
+        {
+          meta->access_unsupported = true;
+          if (dump_file && (dump_flags & TDF_DETAILS))
+            {
+              fprintf (dump_file,
+		       "Can not determine size for data reference\n");
+              dump_data_reference (dump_file, d_ref);
+            }
+        }
+      bitmap_set_bit (meta->access, data->id);
+    }
+}
+
+/* Update META access bitmap by union of access bitmaps of it's children.  */
+
+static void
+opencl_collect_definitions_info (opencl_clast_meta meta)
+{
+  opencl_clast_meta curr = meta->body->next;
+  bitmap tmp_access = BITMAP_ALLOC (NULL);
+  bitmap_copy (tmp_access, meta->body->access);
+  meta->can_be_private = BITMAP_ALLOC (NULL);
+  bitmap_copy (meta->can_be_private, meta->body->can_be_private);
+  while (curr)
+    {
+      bitmap new_defs = BITMAP_ALLOC (NULL);
+      bitmap_and_compl (new_defs, curr->can_be_private, tmp_access);
+      bitmap_ior_into (tmp_access, curr->access);
+      bitmap_ior_into (meta->can_be_private, new_defs);
+      curr = curr->next;
+      BITMAP_FREE (new_defs);
+    }
+  meta->access = tmp_access;
+}
+
+/* Build meta structure from clast structure.
+   BODY - base clast statement.
+   DEPTH - depth of BODY in whole clast structure.
+   PARENT - parent meta node.
+   CODE_GEN - data structure, which holds information
+   about non scalar arguments.  */
+
+opencl_clast_meta
+opencl_create_meta_from_clast (opencl_main code_gen,
+                               struct clast_stmt * body, int depth,
+                               opencl_clast_meta parent)
+{
+  int max_depth = 0;
+  opencl_clast_meta result = NULL;
+  opencl_clast_meta curr = NULL;
+  struct clast_stmt * curr_stmt = body;
+  for ( ; curr_stmt; curr_stmt = curr_stmt->next)
+    {
+      opencl_clast_meta tmp_result = NULL;
+      if (CLAST_STMT_IS_A (curr_stmt, stmt_root))
+        continue;
+      if (CLAST_STMT_IS_A (curr_stmt, stmt_user))
+        {
+          tmp_result = opencl_clast_meta_create (depth, parent, true);
+          opencl_set_meta_rw_flags (tmp_result,
+                                    (struct clast_user_stmt*) curr_stmt,
+                                    code_gen);
+        }
+      if (CLAST_STMT_IS_A (curr_stmt, stmt_guard))
+        {
+          struct clast_guard * if_stmt = (struct clast_guard *)curr_stmt;
+          /* For guard (if) statement create meta for it's body and just
+             append it to current list.  */
+          tmp_result = opencl_create_meta_from_clast (code_gen, if_stmt->then,
+                                                      depth, parent);
+        }
+      if (CLAST_STMT_IS_A (curr_stmt, stmt_block))
+        {
+          struct clast_block * bl_stmt = (struct clast_block *)curr_stmt;
+          tmp_result = opencl_create_meta_from_clast (code_gen, bl_stmt->body,
+                                                      depth, parent);
+        }
+      if (CLAST_STMT_IS_A (curr_stmt, stmt_for))
+        {
+          struct clast_for * for_stmt = (struct clast_for *) curr_stmt;
+          tmp_result = opencl_clast_meta_create (depth, parent, false);
+          tmp_result->body
+	    = opencl_create_meta_from_clast (code_gen, for_stmt->body,
+					     depth + 1, tmp_result);
+
+          max_depth = (max_depth > tmp_result->in_depth + 1)
+	    ? max_depth : tmp_result->in_depth + 1;
+          opencl_collect_definitions_info (tmp_result);
+        }
+      if (!result)
+        curr = result = tmp_result;
+      else
+        curr->next = tmp_result;
+      while (curr->next != NULL) curr = curr->next;
+    }
+  if (parent)
+    parent->in_depth = max_depth;
+
+  return result;
+}
+
+/* For all opencl_data referenced in META calculate depth  of innermost
+   reference.  DEPTH is the depth of the loop, represented be META
+   in current loop nest.  DATE holds intermediate resuls.
+   Function returns false iff there is access to unsupported types in
+   given META.
+   Consider an example:
+
+   |         Meta_1
+   |          / \
+   |         /   \
+   |        /     \
+   |       /       \
+   |    Meta_2      Meta_3
+   |    {D1,D2}     /  \
+   |               /    \
+   |              /      \
+   |           Meta_4   Meta_5
+   |           {D2,D4}    |
+   |                      |
+   |                      |
+   |                   Meta_6
+   |                    {D3}
+
+   In this example D1_depth = 1, D2_depth = 2, D3_depth = 3, D4_depth = 2.  */
+
+static bool
+opencl_calc_max_depth_tab (opencl_clast_meta meta, htab_t data, int depth)
+{
+  while (meta)
+    {
+      if (meta->body)
+        {
+          if (!opencl_calc_max_depth_tab (meta->body, data, depth + 1))
+            return false;
+        }
+      else
+        {
+          /* User stmt.  Analyze data access.  */
+          bitmap stmt_access = meta->access;
+          unsigned i;
+          bitmap_iterator bi;
+          if (meta->access_unsupported)
+            return false;
+          EXECUTE_IF_SET_IN_BITMAP (stmt_access, 0, i, bi)
+            {
+              opencl_pair curr_pair = opencl_pair_create (i, depth);
+              struct opencl_pair_def ** slot
+		= (struct opencl_pair_def **)htab_find_slot (data, curr_pair,
+							     INSERT);
+              if (*slot == NULL)
+                *slot = curr_pair;
+              else
+                {
+                  opencl_pair old_pair = *slot;
+                  if (old_pair->val > curr_pair->val)
+                    opencl_pair_delete (curr_pair);
+                  else
+                    {
+                      *slot = curr_pair;
+                      opencl_pair_delete (old_pair);
+                    }
+                }
+            }
+        }
+      meta = meta->next;
+    }
+  return true;
+}
+
+/* Check whether it's reasonable to pass data, represented by OBJ,
+   to device based on information from META.
+   This function helps to avoid kernels like this.
+
+   | host_use_a_and_b ();
+   | for (int i = 0; i < N; i++)
+   |   a[i] = b[i];
+
+   host_use_a_and_b ();
+
+   We have no dependency here, but memory transfer from host to devices and
+   from device to host aren't reasonable here.
+
+   We have 2 situations when memory transfer is reasonable.
+
+   1.
+
+   | for (int j = 0; j < N; j++)
+   |   {
+   |     host_use_c ();
+   |     for (int i = 0; i < N; i ++)
+   |       a[i] = b [i] + j;
+   |   }
+
+   In this case we can put memory transfer befor first loop, so
+   we will avoid situation, when all benefits from parallel execution
+   can be eliminated by memory transfer.
+
+   2.
+
+   | for (int i = 0; i < N; i ++)
+   |   for (int j = 0; j < N; j ++)
+   |     {
+   |       c[i][j] = 0;
+   |       for (int k = 0; k < N; k ++)
+   |         c[i][j] += a[i][k] * b [k][j];
+   |     }
+
+   In this case each element of a,b or c is used N time on device,
+   so memory transfer is reasonable.  */
+
+static bool
+opencl_evaluate_data_access_p (opencl_data obj, opencl_clast_meta meta)
+{
+  int depth = obj->depth;
+  int data_id = obj->id;
+  opencl_clast_meta parent = meta->parent;
+  if (obj->privatized)
+    return false;
+  if (depth < obj->data_dim)
+    return false;
+  if (parent)
+    {
+      /* We have outer loop.  */
+      bitmap curr_bitmap = parent->modified_on_host;
+      /* Memory transfer for this statement has been placed outside
+         outer loop, so for one memory transfer will be executing more
+         then one kernel (first case).  */
+      if (!bitmap_bit_p (curr_bitmap, data_id))
+        return true;
+    }
+  /* Check max depth of memory access (second case).  */
+  return (depth > obj->data_dim);
+}
+
+/* Find opencl_data object by it's ID in CODE_GEN structures.  */
+
+static opencl_data
+opencl_get_data_by_id (opencl_main code_gen, int id)
+{
+  VEC (opencl_data, heap) * main_data = code_gen->opencl_function_data;
+  opencl_data res = VEC_index (opencl_data, main_data, id);
+  gcc_assert (res->id == id);
+  return res;
+}
+
+/* Check whether memory transfer is reasonable if clast statement,
+   connected with META, will be replaced by opencl kernel launch.
+   ACCESS holds depth of innermost data references
+   for all data, references in statement, represented by META.
+   CODE_GEN holds information about non scalar arguments.  */
+
+static bool
+opencl_analyse_data_access_p (opencl_main code_gen,
+                              htab_t access,
+                              opencl_clast_meta meta)
+{
+  htab_iterator h_iter;
+  opencl_pair curr;
+  int max_dim = 1;
+  int i;
+  opencl_data curr_data;
+  VEC (opencl_data, heap) * data_objs = VEC_alloc (opencl_data, heap,
+                                                   OPENCL_INIT_BUFF_SIZE);
+
+  FOR_EACH_HTAB_ELEMENT (access, curr, opencl_pair, h_iter)
+    {
+      int id = curr->id;
+      opencl_data obj = opencl_get_data_by_id (code_gen, id);
+      VEC_safe_push (opencl_data, heap, data_objs, obj);
+      if (max_dim < obj->data_dim)
+        max_dim = obj->data_dim;
+      obj->depth = curr->val;
+    }
+
+  for (i = 0; VEC_iterate (opencl_data, data_objs, i, curr_data); i++)
+    {
+      if (curr_data->data_dim != max_dim)
+        continue;
+      if (opencl_evaluate_data_access_p (curr_data, meta))
+        return true;
+    }
+  return false;
+}
+
+/* Main predicate which checks whether statement, represented by META and
+   located on depth DEPTH, should be replaced by opencl kernel launch.
+   CODE_GEN holds information about non scalar arguments.  */
+
+bool
+opencl_should_be_parallel_p (opencl_main code_gen,
+                             opencl_clast_meta meta,
+                             int depth)
+{
+  int i_depth = meta->in_depth;
+  htab_t max_access_depth;
+  bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
+
+  if (dump_p)
+    fprintf (dump_file, "opencl_should_be_parallel_p: ");
+
+  /* Avoid launching a lot of small kernels in a deep loop.  */
+  if (!flag_graphite_opencl_no_depth_check)
+    if (depth > i_depth + opencl_base_depth_const)
+      {
+        if (dump_p)
+          fprintf (dump_file, "avoiding small kernel in deep loop\n");
+        return false;
+      }
+
+  max_access_depth = htab_create (OPENCL_INIT_BUFF_SIZE,
+                                  opencl_pair_to_hash,
+                                  opencl_pair_cmp, free);
+
+  /* Can't parallelize if statements in loop contain unsupported types.  */
+  if (!flag_graphite_opencl_no_types_check)
+    if (!opencl_calc_max_depth_tab (meta, max_access_depth, 0))
+      {
+        htab_delete (max_access_depth);
+        if (dump_p)
+          fprintf (dump_file, "unsupported types\n");
+
+        return false;
+      }
+
+  /* Can't parallelize if memory transfer is not reasonable.  */
+  if (!flag_graphite_opencl_no_memory_transfer_check
+      && !flag_graphite_opencl_cpu
+      && !opencl_analyse_data_access_p (code_gen, max_access_depth, meta))
+    {
+      htab_delete (max_access_depth);
+      if (dump_p)
+	fprintf (dump_file, "avoiding large memory transfer\n");
+      return false;
+    }
+
+  htab_delete (max_access_depth);
+
+  if (dump_p)
+    fprintf (dump_file, "ok\n");
+
+  return true;
+}
+
+#endif
diff --git a/gcc/graphite-opencl.c b/gcc/graphite-opencl.c
new file mode 100644
index 0000000..9c28d41
--- /dev/null
+++ b/gcc/graphite-opencl.c
@@ -0,0 +1,2913 @@ 
+/* GRAPHITE-OpenCL pass.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This pass implements transformation of perfect loop nests without
+   dependencies to OpenCL kernels.
+
+   For example, loop nest like this
+
+   | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i)
+   |  for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1})
+   |   ...
+   |    for (s_j = lb_j; s_j <= ub_j; s_j += stride_j)
+   |     {
+   |       STMT(s_i, s_{i+1}, ..., s_j);
+   |     }
+
+   will be transformed to this OpenCL kernel and all required OpenCL
+   calls will be created.
+
+   | __kernel void opencl_auto_function_N
+   |   (base_i, mod_i, step_i, first_i,
+   |    ...,
+   |    base_j, mod_j, step_i, first_j,
+   |    ...)
+   | {
+   |   unsigned int global_id = get_global_id (0);
+   |   unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i;
+   |   unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j;
+   |   STMT(s_i, s_{i+1}, ..., s_j);
+   | }
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include <sys/time.h>
+#include "hashtab.h"
+#include "tree.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+/* Enum for all OpenCL functions used in GRAPHITE-OpenCL.  */
+
+enum OPENCL_FUNCTUONS
+  {
+    STATIC_INIT = 0,
+    CREATE_CONTEXT_FROM_TYPE = 1,
+    GET_CONTEXT_INFO = 2,
+    CREATE_COMMAND_QUEUE = 3,
+    CREATE_PROGRAM_WITH_SOURCE = 4,
+    BUILD_PROGRAM = 5,
+    CREATE_KERNEL = 6,
+    CREATE_BUFFER = 7,
+    SET_KERNEL_ARG = 8,
+    ENQUEUE_ND_RANGE_KERNEL = 9,
+    ENQUEUE_READ_BUFFER = 10,
+    ENQUEUE_WRITE_BUFFER = 11,
+    RELEASE_MEMORY_OBJ = 12,
+    RELEASE_CONTEXT = 13,
+    RELEASE_COMMAND_QUEUE = 14,
+    RELEASE_PROGRAM = 15,
+    RELEASE_KERNEL = 16,
+    GET_PLATFORM_IDS = 17,
+    WAIT_FOR_EVENTS = 18
+  };
+
+/* Constructors and destructors.  */
+static opencl_main opencl_main_create (CloogNames *, sese, edge, htab_t);
+static void opencl_main_delete (opencl_main);
+static void opencl_clast_meta_delete (opencl_clast_meta);
+static tree opencl_create_function_decl (enum OPENCL_FUNCTUONS);
+static edge opencl_create_function_call (edge);
+static void opencl_init_data (scop_p, opencl_main);
+static int opencl_get_non_scalar_type_depth (tree);
+static tree opencl_create_memory_for_pointer (opencl_data);
+static void opencl_init_basic_blocks (opencl_main);
+static edge opencl_set_context_properties (edge, tree);
+static tree opencl_create_clCreateContextFromType (tree);
+static tree opencl_create_clGetContextInfo_1 (tree);
+static void opencl_create_gimple_variables (void);
+static tree opencl_create_clCreateCommandQueue (tree);
+static tree opencl_create_malloc_call (tree);
+static edge opencl_create_init_context (edge);
+static void opencl_wait_for_event (opencl_main, tree);
+static void opencl_transform_stmt_list (struct clast_stmt *, opencl_main, int);
+static void opencl_create_gimple_for_body (opencl_body, opencl_main);
+
+/* Data structure to be used in data_reference_p to opencl_data hash
+   table.  */
+struct map_ref_to_data_def
+{
+  data_reference_p key;
+  opencl_data value;
+};
+
+typedef struct map_ref_to_data_def * map_ref_to_data;
+
+/* Calculate hash value from map_ref_to_data.  */
+
+static hashval_t
+map_ref_to_data_to_hash (const void * data)
+{
+  const struct map_ref_to_data_def * obj
+    = (const struct map_ref_to_data_def *)data;
+  return htab_hash_pointer (obj->key);
+}
+
+/* Compare to map_ref_to_data pointers.  */
+
+static int
+map_ref_to_data_cmp (const void * v1, const void * v2)
+{
+  const struct map_ref_to_data_def * obj_1
+    = (const struct map_ref_to_data_def *)v1;
+  const struct map_ref_to_data_def * obj_2
+    = (const struct map_ref_to_data_def *)v2;
+
+  return (obj_1->key == obj_2->key);
+}
+
+/* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value.  */
+
+static map_ref_to_data
+map_ref_to_data_create (data_reference_p new_key,
+                        opencl_data new_value)
+{
+  map_ref_to_data tmp = XNEW (struct map_ref_to_data_def);
+  tmp->key = new_key;
+  tmp->value = new_value;
+  return tmp;
+}
+
+/* Data structure to be used in tree to opencl_data hash table.  */
+
+struct map_tree_to_data_def
+{
+  tree key;
+  opencl_data value;
+};
+
+typedef struct map_tree_to_data_def * map_tree_to_data;
+
+/* Calculate hash value from map_tree_to_data.  */
+
+static hashval_t
+map_tree_to_data_to_hash (const void * data)
+{
+  const struct map_tree_to_data_def * obj
+    = (const struct map_tree_to_data_def *)data;
+  return htab_hash_pointer (obj->key);
+}
+
+/* Compare to map_tree_to_data pointers.  */
+
+static int
+map_tree_to_data_cmp (const void * v1, const void * v2)
+{
+  const struct map_tree_to_data_def * obj_1
+    = (const struct map_tree_to_data_def *)v1;
+  const struct map_tree_to_data_def * obj_2
+    = (const struct map_tree_to_data_def *)v2;
+
+  return (obj_1->key == obj_2->key);
+}
+
+/* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value.  */
+
+static map_tree_to_data
+map_tree_to_data_create (tree new_key,
+                         opencl_data new_value)
+{
+  map_tree_to_data tmp = XNEW (struct map_tree_to_data_def);
+  tmp->key = new_key;
+  tmp->value = new_value;
+  return tmp;
+}
+
+/* Create and init new temporary variable with name NAME and
+   type TYPE.  */
+
+static tree
+opencl_create_tmp_var (tree type, const char * name)
+{
+  tree tmp = create_tmp_var (type, name);
+  TREE_ADDRESSABLE (tmp) = 1;
+  return tmp;
+}
+
+/* Create new var in basic block DEST to store EXPR and return it.  */
+
+tree
+opencl_tree_to_var (basic_block dest, tree expr)
+{
+  tree type = TREE_TYPE (expr);
+  tree var = opencl_create_tmp_var (type, "__ocl_general_tmp_var");
+  gimple_stmt_iterator g_iter = gsi_last_bb (dest);
+
+  tree call = build2 (MODIFY_EXPR, type, var, expr);
+
+  force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+
+  return var;
+}
+
+/* Set rw flags to false for all datas, referenced in CODE_GEN.  */
+
+static void
+opencl_fflush_rw_flags (opencl_main code_gen)
+{
+  VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+  int i;
+  opencl_data curr;
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    {
+      curr->written_in_current_body = false;
+      curr->read_in_current_body = false;
+      curr->privatized = false;
+    }
+}
+
+/* Create new basic block on CODE_GEN->main edge and update it.  */
+
+basic_block
+opencl_create_bb (opencl_main code_gen)
+{
+  basic_block tmp = split_edge (code_gen->main_edge);
+  code_gen->main_edge = single_succ_edge (tmp);
+  return tmp;
+}
+
+/* All kernels of current function.  */
+static VEC(tree, heap) *opencl_function_kernels;
+
+/* OpenCL code for all kernels of current function.  */
+static dyn_string_t main_program_src;
+
+/* Delete clast meta DATA.  */
+
+static void
+opencl_clast_meta_delete (opencl_clast_meta data)
+{
+  if (!data)
+    return;
+  opencl_clast_meta_delete (data->body);
+  opencl_clast_meta_delete (data->next);
+  BITMAP_FREE (data->modified_on_device);
+  BITMAP_FREE (data->modified_on_host);
+  if (data->access != NULL)
+    BITMAP_FREE (data->access);
+  if (data->can_be_private)
+    BITMAP_FREE (data->can_be_private);
+  free (data);
+}
+
+static inline void
+opencl_verify (void)
+{
+#ifdef ENABLE_CHECKING
+  verify_loop_structure ();
+  verify_dominators (CDI_DOMINATORS);
+  verify_loop_closed_ssa (true);
+#endif
+}
+
+/* OpenCL definitions.  */
+#define CL_CONTEXT_PLATFORM 0x1084
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_TRUE 1
+
+/* Names of all OpenCL functions, used in GRAPHITE-OpenCL.  */
+
+static const char * opencl_function_names[] =
+  {
+    "clCreateContextFromType",
+    "clGetContextInfo",
+    "clCreateCommandQueue",
+    "clCreateProgramWithSource",
+    "clBuildProgram",
+    "clCreateKernel",
+    "clCreateBuffer",
+    "clSetKernelArg",
+    "clEnqueueNDRangeKernel",
+    "clEnqueueReadBuffer",
+    "clEnqueueWriteBuffer",
+    "clReleaseMemObject",
+    "clReleaseContext",
+    "clReleaseCommandQueue",
+    "clReleaseProgram",
+    "clReleaseKernel",
+    "clGetPlatformIDs",
+    "clWaitForEvents"
+  };
+
+#endif
+/* Variable, which holds OpenCL context.  */
+static GTY(()) tree h_context;
+
+/* Variable, which holds OpenCL comman queue.  */
+static GTY(()) tree h_cmd_queue;
+
+/* Variable, which holds OpenCL program for current function.  */
+static GTY(()) tree h_program;
+
+#ifdef HAVE_cloog
+
+/* This vector holds opencl_data, which represents arrays.
+   Arrays have constant sizes, so buffers for each of them can
+   be created only once.  */
+static VEC (opencl_data, heap) * opencl_array_data;
+
+/* Hash table, which maps opencl_data, related to arrays, to
+	  trees, which represents corresponding array.  */
+static htab_t array_data_to_tree;
+
+/* Check whether VAR is a zero dimension array.  */
+
+static bool
+zero_dim_array_p (tree var)
+{
+  tree type = TREE_TYPE (var);
+  tree domain;
+  tree up_bound;
+
+  if (TREE_CODE (type) != ARRAY_TYPE)
+    return false;
+  if (TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+    return false;
+  domain = TYPE_DOMAIN (type);
+  if (domain == NULL)
+    return false;
+  up_bound =  TYPE_MAX_VALUE (domain);
+  if (TREE_CODE (up_bound) != INTEGER_CST)
+    return false;
+  return TREE_INT_CST_LOW (up_bound) == 0;
+}
+
+/* Check whether NAME is the name of the artificial array, which can be
+   privatized.  */
+
+static bool
+opencl_private_var_name_p (const char * name)
+{
+  static const char * general_reduction = "General_Reduction";
+  static const char * close_phi = "Close_Phi";
+  static const char * cross_bb = "Cross_BB_scalar_dependence";
+  static const char * commutative = "Commutative_Associative_Reduction";
+  if (!name)
+    return false;
+  return
+    ((strstr (name, general_reduction) == name)
+     || (strstr (name, close_phi) == name)
+     || (strstr (name, commutative) == name)
+     || (strstr (name, cross_bb) == name));
+}
+
+/* Check whether VAR is an artificial array, which can be privatized.  */
+
+static bool
+graphite_artificial_array_p (tree var)
+{
+  tree name;
+  if (TREE_CODE (var) != VAR_DECL)
+    return false;
+  if (!zero_dim_array_p (var))
+    return false;
+  name = DECL_NAME (var);
+  if (!name)
+    return false;
+  return opencl_private_var_name_p (IDENTIFIER_POINTER (name));
+}
+
+/* Constructors & destructors.
+   <name>_create - creates a new object of such type and returns it.
+   <name>_delete - delete object (like destructor).  */
+
+static opencl_data
+opencl_data_create (tree var, tree size)
+{
+  opencl_data tmp = XNEW (struct opencl_data_def);
+  tree type = TREE_TYPE (var);
+
+  tmp->can_be_private = graphite_artificial_array_p (var);
+  tmp->exact_object = var;
+
+  tmp->supported = TREE_CODE (var) == VAR_DECL || TREE_CODE (var) == SSA_NAME;
+
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    var = build_addr (var, current_function_decl);
+
+  tmp->data_dim = opencl_get_non_scalar_type_depth (type);
+  tmp->object = var;
+
+  tmp->size_value = size;
+  tmp->size_variable
+    = opencl_create_tmp_var (size_type_node, "__opencl_data_size");
+
+  tmp->up_to_date_on_host = true;
+  tmp->up_to_date_on_device = true;
+  tmp->used_on_device = false;
+  tmp->ever_read_on_device = false;
+  tmp->ever_written_on_device = false;
+  return tmp;
+}
+
+static void
+opencl_data_delete (opencl_data data)
+{
+  free (data);
+}
+
+static opencl_main
+opencl_main_create (CloogNames * names, sese region, edge main_edge,
+                    htab_t params_index)
+{
+  opencl_main tmp = XNEW (struct graphite_opencl_creator);
+  tmp->root_names = names;
+  tmp->defined_vars = NULL;
+  tmp->global_defined_vars = NULL;
+  tmp->region = region;
+  tmp->main_edge = main_edge;
+  tmp->main_program = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+  tmp->current_body = NULL;
+  tmp->clast_meta = NULL;
+  tmp->curr_meta = NULL;
+  tmp->params_index = params_index;
+  tmp->newivs_index = htab_create (10, clast_name_index_elt_info,
+                                   eq_clast_name_indexes, free);
+  tmp->ref_to_data = htab_create (10, map_ref_to_data_to_hash,
+                                  map_ref_to_data_cmp, free);
+  tmp->tree_to_data = htab_create (10, map_tree_to_data_to_hash,
+                                   map_tree_to_data_cmp, free);
+  tmp->newivs = VEC_alloc (tree, heap, 10);
+  tmp->context_loop = SESE_ENTRY (region)->src->loop_father;
+  tmp->opencl_function_data = VEC_alloc (opencl_data, heap,
+					 OPENCL_INIT_BUFF_SIZE);
+  return tmp;
+}
+
+static void
+opencl_main_delete (opencl_main data)
+{
+  int i;
+  opencl_data curr;
+  dyn_string_delete (data->main_program);
+  htab_delete (data->newivs_index);
+  htab_delete (data->ref_to_data);
+  htab_delete (data->tree_to_data);
+  opencl_clast_meta_delete (data->clast_meta);
+  for (i = 0; VEC_iterate (opencl_data, data->opencl_function_data, i, curr);
+       i++)
+    if (!curr->is_static)
+      opencl_data_delete (curr);
+  VEC_free (tree, heap, data->newivs);
+  VEC_free (opencl_data, heap, data->opencl_function_data);
+  free (data);
+}
+
+/* Get depth of type TYPE scalar (base) part.  */
+
+static int
+opencl_get_non_scalar_type_depth (tree type)
+{
+  int count = 0;
+  while (TREE_CODE (type) == ARRAY_TYPE
+         || TREE_CODE (type) == POINTER_TYPE)
+    {
+      count++;
+      type = TREE_TYPE (type);
+    }
+  return count;
+}
+
+/* Add function call CALL to edge SRC.  If FLAG_GRAPHITE_OPENCL_DEBUG is
+   enabled, then add the following:
+
+   | int result = call ();
+   | if (call == 0 != ZERO_RETURN)
+   |   abort ();
+
+   Otherwise just add CALL as function call.  */
+
+static edge
+opencl_add_safe_call_on_edge (tree call, bool zero_return, edge src)
+{
+  if (!flag_graphite_opencl_debug)
+    {
+      basic_block bb = split_edge (src);
+      gimple_stmt_iterator g_iter = gsi_last_bb (bb);
+      force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                                GSI_CONTINUE_LINKING);
+      return single_succ_edge (bb);
+    }
+  else
+    {
+      tree cmp;
+      edge result;
+      basic_block abort_bb;
+      tree abort_funtion;
+      tree abort_call;
+      gimple_stmt_iterator g_iter;
+      if (zero_return)
+        {
+          tree correct_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
+                                        integer_zero_node);
+          cmp = build2 (EQ_EXPR, boolean_type_node,
+			call, correct_result);
+        }
+      else
+        {
+          tree incorrect_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
+                                          null_pointer_node);
+
+          cmp = build2 (NE_EXPR, boolean_type_node,
+			call, incorrect_result);
+        }
+      result = create_empty_if_region_on_edge (src, cmp);
+      abort_bb =  get_false_edge_from_guard_bb (src->dest)->dest;
+      abort_funtion = build_fn_decl ("abort", build_function_type_list
+				     (void_type_node, NULL_TREE));
+
+      abort_call = build_call_expr (abort_funtion, 0);
+
+      g_iter = gsi_last_bb (abort_bb);
+      force_gimple_operand_gsi (&g_iter, abort_call, true, NULL, false,
+                                GSI_CONTINUE_LINKING);
+      return result;
+    }
+}
+
+/* Add call CALL to the current edge from CODE_GEN.  ZERO_RETURN indicates
+   whether NULL or 0 is the success return value for CALL.  */
+
+static void
+opencl_add_safe_call (opencl_main code_gen, tree call, bool zero_return)
+{
+  code_gen->main_edge
+    = opencl_add_safe_call_on_edge (call, zero_return, code_gen->main_edge);
+}
+
+/* Get base object for OBJ.  */
+
+tree
+opencl_get_base_object_by_tree (tree obj)
+{
+  while (TREE_CODE (obj) == INDIRECT_REF
+         || TREE_CODE (obj) == ARRAY_REF)
+    obj = TREE_OPERAND (obj, 0);
+  return obj;
+}
+
+/* Get base object from data reference DR.  */
+
+tree
+dr_outermost_base_object (data_reference_p dr)
+{
+  tree addr = DR_BASE_ADDRESS (dr);
+  if (!addr)
+    {
+      /* In case, we don't know base object.  For example:
+
+         | void foo (int * a)
+         | {
+         |   int * b = a + 1;
+         |   *b = 0;
+         | }
+
+         Just return the innermost object when the base address is unknown.  */
+      tree ref = DR_REF (dr);
+      return opencl_get_base_object_by_tree (ref);
+    }
+
+  if (TREE_CODE (addr) == ADDR_EXPR)
+    addr = TREE_OPERAND (addr, 0);
+  return addr;
+}
+
+/* Get correct basic block for data with DATA_ID transfer.  If DEVICE
+   is true, then it's host to device transfer, otherwise it's device
+   to host transfer.  CODE_GEN holds information related to code
+   generation.  */
+
+static edge
+opencl_get_edge_for_init (opencl_main code_gen, int data_id, bool device)
+{
+  opencl_clast_meta curr = code_gen->curr_meta;
+  if (!curr)
+    return NULL;
+
+  while (curr->parent)
+    {
+      opencl_clast_meta parent = curr->parent;
+      bitmap curr_bitmap
+	= device ? parent->modified_on_host : parent->modified_on_device;
+      if (bitmap_bit_p (curr_bitmap, data_id))
+        break;
+      curr = curr->parent;
+    }
+  return curr->init_edge;
+}
+
+/* Add host to device memory transfer.  DATA - data, which must be
+   transfered to device.  CODE_GEN holds information related to code
+   generation.  */
+
+static tree
+opencl_pass_to_device (opencl_main code_gen, opencl_data data)
+{
+  edge init_edge;
+  tree function;
+  tree call;
+
+  tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+  tree array_type = build_array_type (ptr_type_node, index_type);
+  tree var = opencl_create_tmp_var (array_type, "wait_event");
+  tree event_call;
+
+  TREE_STATIC (var) = 1;
+  assemble_variable (var, 1, 0, 1);
+
+  /* If data is wrutten in device, mark it is not up to date on host.  */
+  if (data->written_in_current_body)
+    data->up_to_date_on_host = false;
+
+  /* If data is up to date on device, but it was initialized befor
+     current loop, then mark it as initialized in current loop and
+     store it.
+
+     Consider an example: D - device, H - host, W - write, R - read.
+
+     | HW(1) -- LOOP
+     |          /\
+     |         /  \
+     |        /    \
+     |      HR(2)  DW(3)
+
+     While analyzing statement (2), data will be up to date on host
+     because of statement (1), but while executing after (3) in loop,
+     (2) will read incorrect data.
+
+     So, we have to add device to host memory transfer after statement (3).
+  */
+  if (flag_graphite_opencl_cpu)
+    return data->device_object;
+
+  if (data->up_to_date_on_device)
+    {
+      if (!data->inited_in_current_loop_on_device
+	  && code_gen && code_gen->curr_meta
+	  && code_gen->curr_meta->parent)
+	VEC_safe_push (opencl_data, heap,
+		       code_gen->curr_meta->parent->post_pass_to_device,
+		       data);
+
+      data->inited_in_current_loop_on_device = true;
+      return data->device_object;
+    }
+
+  data->inited_in_current_loop_on_device = true;
+  init_edge = opencl_get_edge_for_init (code_gen, data->id, true);
+
+  /* Add gimple.  */
+  function = opencl_create_function_decl (ENQUEUE_WRITE_BUFFER);
+
+  event_call = build4 (ARRAY_REF, ptr_type_node, var,
+                       integer_zero_node, NULL_TREE, NULL_TREE);
+  event_call = build_addr (event_call, current_function_decl);
+  call = build_call_expr (function, 9,
+                          h_cmd_queue,
+                          data->device_object,
+                          build_int_cst (NULL_TREE, CL_TRUE),
+                          integer_zero_node,
+                          data->size_variable,
+                          data->object,
+                          integer_zero_node,
+                          null_pointer_node,
+                          event_call);
+  if (init_edge)
+    opencl_add_safe_call_on_edge (call, true, init_edge);
+  else
+    opencl_add_safe_call (code_gen, call, true);
+  data->up_to_date_on_device = true;
+  opencl_wait_for_event (code_gen, event_call);
+  return data->device_object;
+}
+
+/* Add device to host memory transfer.  DATA - data, which must be
+   transfered to host.  CODE_GEN holds information related to code
+   generation.  */
+
+static void
+opencl_pass_to_host (opencl_main code_gen, opencl_data data)
+{
+  edge init_edge;
+  tree function;
+  tree curr_type;
+  tree curr;
+  tree call;
+
+  tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+  tree array_type = build_array_type (ptr_type_node, index_type);
+  tree var = opencl_create_tmp_var (array_type, "wait_event");
+  tree event_call;
+
+  TREE_STATIC (var) = 1;
+  assemble_variable (var, 1, 0, 1);
+  if (data->written_in_current_body)
+    data->up_to_date_on_device = false;
+  if (data->up_to_date_on_host)
+    {
+      if (!data->inited_in_current_loop_on_host
+	  && code_gen && code_gen->curr_meta &&
+	  code_gen->curr_meta->parent)
+	VEC_safe_push (opencl_data, heap,
+		       code_gen->curr_meta->parent->post_pass_to_host, data);
+
+      data->inited_in_current_loop_on_host = true;
+      return;
+    }
+
+  data->inited_in_current_loop_on_host = true;
+
+  if (flag_graphite_opencl_cpu)
+    return;
+
+  if (data->privatized)
+    return;
+
+  init_edge = opencl_get_edge_for_init (code_gen, data->id, false);
+
+  function = opencl_create_function_decl (ENQUEUE_READ_BUFFER);
+  curr_type =  TREE_TYPE (data->object);
+  curr = data->object;
+
+  if (TREE_CODE (curr_type) == ARRAY_TYPE)
+    curr = build_addr (curr, current_function_decl);
+
+  event_call = build4 (ARRAY_REF, ptr_type_node, var,
+                       integer_zero_node, NULL_TREE, NULL_TREE);
+  event_call = build_addr (event_call, current_function_decl);
+
+
+  call = build_call_expr (function, 9,
+                          h_cmd_queue,
+                          data->device_object,
+                          build_int_cst (NULL_TREE, CL_TRUE),
+                          integer_zero_node,
+                          data->size_variable,
+                          curr, integer_zero_node,
+                          null_pointer_node,
+                          event_call);
+
+  if (init_edge)
+    opencl_add_safe_call_on_edge (call, true, init_edge);
+  else
+    opencl_add_safe_call (code_gen, call, true);
+  opencl_wait_for_event (code_gen, event_call);
+  data->up_to_date_on_host = true;
+}
+
+/* Pass all data from device to host.  This function must be called when
+   we need all data to be up to date on host.  CODE_GEN holds information
+   related to code generation.  */
+
+static void
+opencl_fflush_all_device_buffers_to_host (opencl_main code_gen)
+{
+  VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+  int i;
+  opencl_data curr;
+  tree function = opencl_create_function_decl (RELEASE_MEMORY_OBJ);
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    {
+      curr->written_in_current_body = true;
+      opencl_pass_to_host (code_gen, curr);
+    }
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    {
+      if (curr->used_on_device && !curr->is_static)
+        {
+          tree var = curr->device_object;
+          tree call = build_call_expr (function, 1, var);
+          opencl_add_safe_call (code_gen, call, true);
+        }
+    }
+}
+
+/* Create memory buffers on host for all required host memory objects.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_init_all_device_buffers (opencl_main code_gen)
+{
+  VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+  int i;
+  opencl_data curr;
+  edge data_init_edge = single_succ_edge (code_gen->data_init_bb);
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    {
+      tree tmp;
+      if (!curr->used_on_device || curr->is_static)
+        continue;
+
+      tmp = opencl_create_memory_for_pointer (curr);
+      tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
+      data_init_edge = opencl_add_safe_call_on_edge (tmp, false,
+                                                     data_init_edge);
+    }
+}
+
+/* Create new static void * variable with name __ocl_ + NAME.  */
+
+static tree
+opencl_create_static_ptr_variable (const char * name)
+{
+  const char * id_name = concat ("__ocl_",name,  NULL);
+  tree var =  build_decl (UNKNOWN_LOCATION, VAR_DECL,
+                          create_tmp_var_name (id_name), ptr_type_node);
+  TREE_STATIC (var) = 1;
+  TREE_PUBLIC (var) = 0;
+  DECL_ARTIFICIAL (var) = 1;
+  TREE_USED (var) = 1;
+  TREE_ADDRESSABLE (var) = 1;
+  DECL_INITIAL (var) = null_pointer_node;
+  assemble_variable (var, 1, 0, 1);
+
+  return var;
+}
+
+/* Insert several opencl calls to output program.  */
+
+/* | cl_program h_program;
+   | h_program = clCreateProgramWithSource (h_context, 1,
+   | 				          sProgramSource, 0, 0);
+
+   SRC is the program source code and DEST is the edge where
+   call must be inserted.  */
+
+static edge
+opencl_insert_create_program_with_source_call (const char * src, edge base)
+{
+  /* Get tree with function definition.  */
+  tree function = opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE);
+  tree code_tree = build_string_literal (strlen (src) + 1, src);
+  tree call;
+  basic_block bb = split_edge (base);
+  tree tmp_var = opencl_tree_to_var (bb, code_tree);
+
+  call = build_call_expr (function, 5, h_context,
+                          integer_one_node,
+                          build_addr (tmp_var, current_function_decl),
+                          null_pointer_node,
+                          null_pointer_node);
+
+  call = build2 (MODIFY_EXPR, ptr_type_node,
+                 h_program, call);
+  return opencl_add_safe_call_on_edge (call, false, single_succ_edge (bb));
+}
+
+/* clBuildProgram (h_program, 0, 0, 0, 0, 0);
+   BASE is the edge where call must be inserted.  */
+
+static edge
+opencl_insert_build_program_call (edge base)
+{
+  tree function = opencl_create_function_decl (BUILD_PROGRAM);
+  tree call = build_call_expr (function, 6,
+                               h_program,
+                               integer_zero_node, null_pointer_node,
+                               null_pointer_node, null_pointer_node,
+                               null_pointer_node);
+  return opencl_add_safe_call_on_edge (call, true, base);
+}
+
+/* cl_kernel tmm_kernel;
+   tmp_kernel = clCreateKernel (h_program, func_name, 0);
+   FUNCTION_NAME is the name of the kernel function,
+   CODE_GEN holds information related to code generation.  */
+
+static tree
+opencl_insert_create_kernel_call (opencl_main code_gen,
+				  const char *function_name)
+{
+  tree new_kernel_var;
+  basic_block bb;
+  gimple_stmt_iterator g_iter;
+  tree function;
+  tree kernel_name;
+  tree call;
+  tree tmp_tree;
+
+  new_kernel_var = opencl_create_tmp_var (ptr_type_node, function_name);
+  bb = split_edge (code_gen->kernel_edge);
+  g_iter = gsi_last_bb (bb);
+  function = opencl_create_function_decl (CREATE_KERNEL);
+  kernel_name = build_string_literal (strlen (function_name) + 1,
+                                      function_name);
+  call = build_call_expr (function, 3, h_program, kernel_name,
+                          null_pointer_node);
+  tmp_tree = build2 (MODIFY_EXPR, ptr_type_node,
+                     new_kernel_var, call);
+
+  code_gen->kernel_edge = single_succ_edge (bb);
+  force_gimple_operand_gsi (&g_iter, tmp_tree, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+  VEC_safe_push (tree, heap, opencl_function_kernels, new_kernel_var);
+  code_gen->kernel_edge
+    = opencl_add_safe_call_on_edge (new_kernel_var, false,
+				    code_gen->kernel_edge);
+  return new_kernel_var;
+}
+
+/* Init memory on device.  Only one levell of pointers are suppoted.
+   So in case of char ** only array of char * will be created.
+   Function return tree, corresponding to new pointer (pointer
+   on device).
+
+   | cl_mem clCreateBuffer (cl_context context,
+   |                        cl_mem_flags flags,
+   |                        size_t size,
+   |                        void *host_ptr,
+   |                        cl_int *errcode_ret)  */
+
+/* Calculate size of data reference, represented by REF.  PTR is a
+   base object of data reference.  */
+
+static tree
+opencl_get_indirect_size (tree ptr, poly_dr_p ref)
+{
+  ptr = TREE_TYPE (ptr);
+  switch (TREE_CODE (ptr))
+    {
+    case ARRAY_TYPE:
+      return TYPE_SIZE_UNIT (ptr);
+
+    case POINTER_TYPE:
+      {
+	tree inner_type = TREE_TYPE (ptr);
+	tree t = graphite_outer_subscript_bound (ref, false);
+	tree inner_type_size = TYPE_SIZE_UNIT (inner_type);
+	if (inner_type_size == NULL)
+	  return NULL;
+
+	if (DECL_P (inner_type_size))
+	  add_referenced_var (inner_type_size);
+
+	gcc_assert (t);
+	t = fold_build2 (TRUNC_DIV_EXPR, sizetype, t, inner_type_size);
+	t = fold_build2 (PLUS_EXPR, sizetype, t, size_one_node);
+	t = fold_build2 (MULT_EXPR, sizetype, t, inner_type_size);
+	return t;
+      }
+    default:
+      return NULL_TREE;
+    }
+  gcc_unreachable ();
+}
+
+/* Calculate correct flags for clCreateBuffer.  READ means, that
+   buffer must be readable on device, WRITE - that buffer must be
+   writable on device.  */
+
+static int
+opencl_get_mem_flags (bool read, bool write)
+{
+  int rw_flags;
+  int location_flags;
+  gcc_assert (read || write);
+  if (write && read)
+    rw_flags = CL_MEM_READ_WRITE;
+  else
+    {
+      if (read)
+        rw_flags = CL_MEM_READ_ONLY;
+      else
+        rw_flags = CL_MEM_WRITE_ONLY;
+    }
+  if (flag_graphite_opencl_cpu)
+    location_flags = CL_MEM_USE_HOST_PTR;
+  else
+    location_flags = CL_MEM_COPY_HOST_PTR;
+  return location_flags | rw_flags;
+}
+
+/* Create memory on device for DATA and init it by data from host.
+   ptr is pointer to host memory location.  Function returns tree,
+   corresponding to memory location on device.  */
+
+static tree
+opencl_create_memory_for_pointer (opencl_data data)
+{
+  tree ptr = data->object;
+  tree arr_size = data->size_variable;
+  tree function = opencl_create_function_decl (CREATE_BUFFER);
+  bool ever_read = data->ever_read_on_device;
+  bool ever_written = data->ever_written_on_device;
+  tree mem_flags = build_int_cst (NULL_TREE,
+				  opencl_get_mem_flags (ever_read,
+							ever_written));
+  if (TREE_CODE (TREE_TYPE (ptr)) == ARRAY_TYPE)
+    ptr = build_addr (ptr, current_function_decl);
+
+  if (flag_graphite_opencl_debug)
+    {
+      tree result = opencl_create_tmp_var (integer_type_node,
+                                           "__opencl_create_buffer_result");
+
+      return build_call_expr (function, 5,
+                              h_context,  mem_flags,
+                              arr_size, ptr,
+                              build1 (ADDR_EXPR,
+                                      integer_ptr_type_node,
+                                      result));
+    }
+  else
+    return build_call_expr (function, 5,
+                            h_context,  mem_flags,
+                            arr_size, ptr, null_pointer_node);
+}
+
+/* Create variables for kernel KERNEL arguments.  Each argument is
+   represented by new variable with it's value and it's size.  If arg
+   is a pointer or array, it's represented by device buffer with data
+   from host memory.  CODE_GEN holds information related to code
+   generation.  */
+
+static void
+opencl_init_local_device_memory (opencl_main code_gen, opencl_body kernel)
+{
+  VEC (tree, heap) ** args = &kernel->function_args;
+  VEC (tree, heap) ** args_to_pass = &kernel->function_args_to_pass;
+  VEC (opencl_data, heap) ** refs = &kernel->data_refs;
+  tree curr;
+  opencl_data curr_data;
+  int i;
+  basic_block bb = opencl_create_bb (code_gen);
+  basic_block kernel_bb = split_edge (code_gen->kernel_edge);
+  code_gen->kernel_edge = single_succ_edge (kernel_bb);
+  for (i = 0; VEC_iterate (tree, *args, i, curr); i ++)
+    {
+      gimple_stmt_iterator g_iter = gsi_last_bb (bb);
+      gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
+      tree curr_type = TREE_TYPE (curr);
+      tree new_type;
+      tree tmp_var;
+      tree mov;
+      tree curr_var = opencl_create_tmp_var (curr_type, "__ocl_iv");
+      if (TREE_CODE (curr) != PARM_DECL
+          && TREE_CODE (curr) != VAR_DECL)
+        {
+          mov = build2 (MODIFY_EXPR, curr_type, curr_var, curr);
+
+          force_gimple_operand_gsi (&g_iter, mov, false, NULL, false,
+                                    GSI_CONTINUE_LINKING);
+        }
+      else
+	force_gimple_operand_gsi (&g_iter, curr, false, curr_var, false,
+				  GSI_CONTINUE_LINKING);
+      curr = curr_var;
+
+      new_type = build_pointer_type (curr_type);
+      tmp_var = opencl_create_tmp_var (new_type, "__opencl_scalar_arg");
+      mov = build1 (ADDR_EXPR, new_type, curr);
+
+      mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
+
+      force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
+                                GSI_CONTINUE_LINKING);
+      VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
+    }
+  for (i = 0; VEC_iterate (opencl_data, *refs, i, curr_data); i++)
+    {
+      gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
+      tree new_type;
+      tree tmp_var;
+      tree mov;
+
+      tree curr = opencl_pass_to_device (code_gen, curr_data);
+      tree curr_type = ptr_type_node;
+
+      new_type = build_pointer_type (curr_type);
+      tmp_var = opencl_create_tmp_var (new_type, "__opencl_non_scalar_arg");
+      mov = build1 (ADDR_EXPR, new_type, curr);
+
+      mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
+
+      force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
+                                GSI_CONTINUE_LINKING);
+      VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
+
+    }
+}
+
+/* cl_int clSetKernelArg (cl_kernel kernel,
+   cl_uint arg_index,
+   size_t arg_size,
+   const void *arg_value)
+
+   Set all kernel args for OpenCL kernel, represented by KERNEL_VAR.
+   KERNEL holds all data, related to given kernel.
+   CODE_GEN holds information related to code generation.
+   All arguments are passed by pointer.  */
+
+static void
+opencl_pass_kernel_arguments (opencl_main code_gen, opencl_body kernel,
+                              tree kernel_var)
+{
+  VEC (tree, heap) * args_to_pass = kernel->function_args_to_pass;
+  tree arg;
+  int i;
+  tree function = opencl_create_function_decl (SET_KERNEL_ARG);
+  for (i = 0; VEC_iterate (tree, args_to_pass, i, arg); i++)
+    {
+      tree call
+	= build_call_expr (function, 4, kernel_var,
+			   build_int_cst (NULL_TREE, i),
+			   TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg))),
+			   arg);
+      opencl_add_safe_call (code_gen, call, true);
+    }
+}
+
+/* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0,
+   &cnDimension, 0, 0, 0, 0);
+
+   Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads.
+   Use EVENT_VAR as event variable for asynchronous call.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_execute_kernel (opencl_main code_gen, tree num_of_exec,
+                       tree kernel_var, tree event_var)
+{
+  tree function = opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL);
+  tree num_of_threads = opencl_create_tmp_var (integer_type_node,
+					       "__opencl_num_of_exec");
+  gimple_stmt_iterator g_iter = gsi_last_bb (opencl_create_bb (code_gen));
+  tree call;
+
+  TREE_STATIC (num_of_threads) = 1;
+  assemble_variable (num_of_threads, 1, 0, 1);
+
+  call = build2 (MODIFY_EXPR, integer_type_node, num_of_threads, num_of_exec);
+
+
+  force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+
+  call = build1 (ADDR_EXPR, integer_ptr_type_node, num_of_threads);
+
+  call = build_call_expr (function, 9,
+                          h_cmd_queue,
+                          kernel_var,
+                          integer_one_node,
+                          null_pointer_node,
+                          call,
+                          null_pointer_node,
+                          integer_zero_node,
+                          null_pointer_node,
+                          event_var);
+
+  opencl_add_safe_call (code_gen, call, true);
+}
+
+/* Place building program from single source string to edge BASE.
+   Current implementation performs single build per function.
+   String contains kernels from all scops of current function.
+   Functions returns true if any kernel has been created.  */
+
+static edge
+opencl_create_function_call (edge base)
+{
+  edge new_edge;
+  const char * src;
+
+  /* Required for addressing types with size less then 4 bytes.  */
+  dyn_string_prepend_cstr
+    (main_program_src,
+     "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n");
+  /* Required for double type.  */
+  dyn_string_prepend_cstr
+    (main_program_src, "#pragma OPENCL EXTENSION cl_khr_fp64  : enable\n ");
+  src = dyn_string_buf (main_program_src);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "\nGenerated OpenCL code: \n");
+      fprintf (dump_file, "%s", src);
+    }
+  new_edge = opencl_insert_create_program_with_source_call (src, base);
+
+  return opencl_insert_build_program_call (new_edge);
+}
+
+/* Mark privatizable data for current loop nest.  Information where
+   given data can be privatized is taken from meta information of
+   current loop nest, which is stored in CODE_GEN.  */
+
+static void
+opencl_mark_privatized_data (opencl_main code_gen)
+{
+  VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+  int i;
+  opencl_data curr;
+  bitmap can_be_private = code_gen->curr_meta->can_be_private;
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    curr->privatized = bitmap_bit_p (can_be_private, curr->id);
+
+}
+
+/* Store calculated sizes of all pointers or arrays to variables.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_set_data_size (opencl_main code_gen)
+{
+  VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+  int i;
+  opencl_data curr;
+  gimple_stmt_iterator g_iter = gsi_last_bb (code_gen->data_init_bb);
+  for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+    {
+      tree call;
+      if (curr->is_static)
+        continue;
+      if (!curr->used_on_device)
+        continue;
+
+      if (curr->size_value == NULL)
+        continue;
+
+      call = build2 (MODIFY_EXPR, size_type_node,
+                     curr->size_variable, curr->size_value);
+
+      force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                                GSI_CONTINUE_LINKING);
+    }
+}
+
+/* Transform clast statement DATA from scop SCOP to OpenCL calls
+   in region REGION.  Place all calls to edge MAIN.  PARAM_INDEX
+   holds external scop params.  */
+
+void
+opencl_transform_clast (struct clast_stmt * data, sese region,
+                        edge main, scop_p scop, htab_t params_index)
+{
+  opencl_main code_gen;
+  /* Create main data struture for code generation.  */
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "\nGenerating OpenCL code for SCoP: \n");
+      print_scop (dump_file, scop, 0);
+    }
+
+  code_gen = opencl_main_create (((struct clast_root *)data)->names,
+                                 region, main, params_index);
+
+  opencl_init_basic_blocks (code_gen);
+  opencl_init_data (scop, code_gen);
+
+  code_gen->clast_meta = opencl_create_meta_from_clast (code_gen, data, 1,
+							NULL);
+  code_gen->curr_meta = code_gen->clast_meta;
+
+  opencl_transform_stmt_list (data, code_gen, 1);
+  if (dyn_string_length (code_gen->main_program) != 0)
+    {
+      dyn_string_append (main_program_src, code_gen->main_program);
+      opencl_set_data_size (code_gen);
+      opencl_init_all_device_buffers (code_gen);
+      opencl_fflush_all_device_buffers_to_host (code_gen);
+    }
+  recompute_all_dominators ();
+  update_ssa (TODO_update_ssa);
+  opencl_main_delete (code_gen);
+}
+
+/* Prepare memory for gimple (host) statement, represented by PBB.
+   Copy memory from device to host if it's nessesary.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb, opencl_main code_gen)
+{
+  VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+  int i;
+  poly_dr_p curr;
+  for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+    {
+      data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+      opencl_data data;
+      bool is_read;
+
+      /* Scalar variables can be passed directly.  */
+      data = opencl_get_data_by_data_ref (code_gen, d_ref);
+      /* Private variables should not be passed from device to host.  */
+      if (data->privatized)
+        continue;
+      is_read = DR_IS_READ (d_ref);
+      gcc_assert (data);
+
+      data->read_in_current_body = is_read;
+      data->written_in_current_body = !is_read;
+      opencl_pass_to_host (code_gen, data);
+
+      if (!is_read)
+        bitmap_set_bit (code_gen->curr_meta->modified_on_host, data->id);
+    }
+}
+
+/* Add basic block from clast_user_stmt STMT to gimple.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_add_gimple_for_user_stmt (struct clast_user_stmt * stmt,
+				 opencl_main code_gen)
+{
+  gimple_bb_p gbb;
+  CloogStatement *cs = stmt->statement;
+  poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
+  sese region = code_gen->region;
+  int nb_loops = number_of_loops ();
+  int i;
+  VEC (tree, heap) * iv_map = VEC_alloc (tree, heap, nb_loops);
+  htab_t newivs_index = code_gen->newivs_index;
+  VEC (tree, heap) * newivs = code_gen->newivs;
+  /* Get basic block to add.  */
+  gbb = PBB_BLACK_BOX (pbb);
+
+  if (GBB_BB (gbb) == ENTRY_BLOCK_PTR)
+    return;
+
+  /*Reset flags.  */
+  opencl_fflush_rw_flags (code_gen);
+
+  /* Pass all required memory to host.  */
+  opencl_prepare_memory_for_gimple_stmt (pbb, code_gen);
+
+  for (i = 0; i < nb_loops; i++)
+    VEC_quick_push (tree, iv_map, NULL_TREE);
+
+  build_iv_mapping (iv_map, region, newivs, newivs_index,
+                    stmt, code_gen->params_index);
+  code_gen->main_edge
+    = copy_bb_and_scalar_dependences (GBB_BB (gbb), region,
+                                      code_gen->main_edge, iv_map);
+  VEC_free (tree, heap, iv_map);
+  recompute_all_dominators ();
+  update_ssa (TODO_update_ssa);
+
+  opencl_verify ();
+}
+
+/* Add if statement, represented by S to current gimple.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_add_gimple_for_stmt_guard (struct clast_guard * s,
+                                  opencl_main code_gen, int depth)
+{
+  edge last_e = graphite_create_new_guard (code_gen->region,
+                                           code_gen->main_edge, s,
+                                           code_gen->newivs,
+                                           code_gen->newivs_index,
+                                           code_gen->params_index);
+
+  edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
+  code_gen->main_edge = true_e;
+  opencl_transform_stmt_list (s->then, code_gen, depth);
+  code_gen->main_edge = last_e;
+
+  recompute_all_dominators ();
+  opencl_verify ();
+}
+
+/* Reset data structures before processing loop, represented by META.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_init_new_loop (opencl_clast_meta meta, opencl_main code_gen)
+{
+  opencl_data curr;
+  unsigned i;
+  meta->post_pass_to_host
+    = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+  meta->post_pass_to_device
+    = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+
+  for (i = 0;
+       VEC_iterate (opencl_data, code_gen->opencl_function_data, i, curr);
+       i++)
+    {
+      curr->inited_in_current_loop_on_host = false;
+      curr->inited_in_current_loop_on_device = false;
+    }
+}
+
+/* Post loop init.  Loop through all data stored in POST_PASS_TO_HOST
+   and POST_PASS_TO_DEVICE vectors in META.  This data must be up to
+   date on host or device respectively at the end of current loop.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_postpass_data (opencl_main code_gen, opencl_clast_meta meta)
+{
+  opencl_data curr;
+  unsigned i;
+  for (i = 0;
+       VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
+    {
+      curr->written_in_current_body = false;
+      opencl_pass_to_host (code_gen, curr);
+    }
+
+  if (!flag_graphite_opencl_cpu)
+    for (i = 0;
+	 VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
+      {
+        curr->written_in_current_body = false;
+        opencl_pass_to_device (code_gen, curr);
+      }
+  if (meta->parent)
+    {
+      VEC (opencl_data, heap) *parent_vec_host
+	= meta->parent->post_pass_to_host;
+      VEC (opencl_data, heap) *parent_vec_device
+	= meta->parent->post_pass_to_device;
+
+      for (i = 0;
+	   VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
+	VEC_safe_push (opencl_data, heap, parent_vec_host, curr);
+
+      for (i = 0;
+	   VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
+	VEC_safe_push (opencl_data, heap, parent_vec_device, curr);
+    }
+  VEC_free (opencl_data, heap, meta->post_pass_to_host);
+  VEC_free (opencl_data, heap, meta->post_pass_to_device);
+}
+
+/* Add loop body, of the loop, represented by S, on host.
+   Loop body can contain device code.
+   DEPTH contains depth of given loop in current loop nest.
+   DEPENDENCY indicates where given loop has any dependencies.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_add_gimple_for_loop (struct clast_for * s, opencl_main code_gen,
+			    int depth, bool dependency)
+{
+  loop_p old_parent = code_gen->context_loop;
+  loop_p new_loop
+    = graphite_create_new_loop (code_gen->region,
+                                code_gen->main_edge,
+                                s, code_gen->context_loop,
+				& code_gen->newivs,
+				code_gen->newivs_index,
+				code_gen->params_index,
+				depth);
+
+  edge last_e = single_exit (new_loop);
+  edge to_body = single_succ_edge (new_loop->header);
+  basic_block after = to_body->dest;
+  opencl_clast_meta parent = code_gen->curr_meta->parent;
+  last_e = single_succ_edge (split_edge (last_e));
+
+  code_gen->context_loop = new_loop;
+  code_gen->main_edge = single_succ_edge (new_loop->header);
+
+  opencl_init_new_loop (parent, code_gen);
+  opencl_transform_stmt_list (s->body, code_gen, depth + 1);
+
+  code_gen->context_loop = old_parent;
+
+  redirect_edge_succ_nodup (code_gen->main_edge, after);
+  set_immediate_dominator (CDI_DOMINATORS, code_gen->main_edge->dest,
+                           code_gen->main_edge->src);
+
+  opencl_postpass_data (code_gen, parent);
+  code_gen->main_edge = last_e;
+
+  if (flag_loop_parallelize_all && !dependency)
+    new_loop->can_be_parallel = true;
+  opencl_verify ();
+}
+
+/* Add loop, represented by S, on host.  Loop body can contain device code.
+   DEPTH contains depth of given loop in current loop nest.
+   DEPENDENCY indicates where given loop has any dependencies.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_add_gimple_for_stmt_for (struct clast_for * s, opencl_main code_gen,
+				int depth, bool dependency)
+{
+  edge last_e = graphite_create_new_loop_guard (code_gen->region,
+                                                code_gen->main_edge,
+                                                s, code_gen->newivs,
+                                                code_gen->newivs_index,
+                                                code_gen->params_index);
+
+  edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
+  code_gen->main_edge = true_e;
+
+  opencl_add_gimple_for_loop (s, code_gen, depth, dependency);
+  code_gen->main_edge = last_e;
+}
+
+/* Calculate parent data access flags in META based on children.
+   parent->modified_on_host = OR_{forall children} child->modified_on_host.
+   parent->modified_on_device = OR_{forall children} child->modified_on_device.
+*/
+
+static void
+opencl_fix_meta_flags (opencl_clast_meta meta)
+{
+  opencl_clast_meta curr = meta->body;
+  while (curr)
+    {
+      bitmap_ior_into (meta->modified_on_host, curr->modified_on_host);
+      bitmap_ior_into (meta->modified_on_device, curr->modified_on_device);
+      curr = curr->next;
+    }
+}
+
+/* Delete opencl_body DATA.  */
+
+static void
+opencl_body_delete (opencl_body data)
+{
+  dyn_string_delete (data->body);
+  dyn_string_delete (data->header);
+  dyn_string_delete (data->pre_header);
+  dyn_string_delete (data->non_scalar_args);
+  VEC_free (tree, heap, data->function_args);
+  VEC_free (tree, heap, data->function_args_to_pass);
+  VEC_free (opencl_data, heap, data->data_refs);
+  free (data);
+}
+
+/* Parse clast statement list S, located on depth DEPTH in current loop nest.
+   This function generates gimple from clast statements, but in case of
+   stmt_for either host or device code can be generated.
+   CODE_GEN holds information related to code generation.  */
+
+static void
+opencl_transform_stmt_list (struct clast_stmt * s, opencl_main code_gen,
+			    int depth)
+{
+  bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
+  for ( ; s; s = s->next)
+    {
+      opencl_clast_meta tmp = code_gen->curr_meta;
+      if (CLAST_STMT_IS_A (s, stmt_root))
+        continue;
+      else if (CLAST_STMT_IS_A (s, stmt_user))
+        {
+          code_gen->curr_meta->init_edge = code_gen->main_edge;
+          opencl_add_gimple_for_user_stmt ((struct clast_user_stmt *) s,
+                                           code_gen);
+          code_gen->curr_meta = code_gen->curr_meta->next;
+        }
+      else if (CLAST_STMT_IS_A (s, stmt_for))
+        {
+          opencl_clast_meta current_clast  = code_gen->curr_meta;
+          struct clast_for *for_stmt = (struct clast_for *) s;
+          bool dependency = false;
+          bool parallel = false;
+          /* If there are dependencies in loop, it can't be parallelized.  */
+          if (!flag_graphite_opencl_no_dep_check &&
+              dependency_in_clast_loop_p (code_gen, current_clast,
+                                          for_stmt, depth))
+            {
+	      if (dump_p)
+		fprintf (dump_file, "dependency in loop\n");
+	      dependency = true;
+            }
+          if (!dependency)
+            parallel = opencl_should_be_parallel_p (code_gen, current_clast,
+                                                    depth);
+
+          /* Create init block for memory transfer befor loop.  */
+          current_clast->init_edge = code_gen->main_edge;
+
+          if (parallel && !dependency)
+            {
+              opencl_body current_body;
+              opencl_fflush_rw_flags (code_gen);
+              opencl_mark_privatized_data (code_gen);
+              current_clast->on_device = true;
+              current_body
+		= opencl_clast_to_kernel (for_stmt, code_gen, depth);
+
+              if (current_body->num_of_data_writes)
+                {
+                  dyn_string_t header = current_body->header;
+                  dyn_string_t pre_header = current_body->pre_header;
+                  dyn_string_t body = current_body->body;
+
+                  dyn_string_append (code_gen->main_program, header);
+                  dyn_string_append (code_gen->main_program, pre_header);
+                  dyn_string_append (code_gen->main_program, body);
+
+                  opencl_create_gimple_for_body (current_body, code_gen);
+                }
+              htab_delete (code_gen->global_defined_vars);
+              update_ssa (TODO_update_ssa);
+              opencl_verify ();
+              opencl_body_delete (current_body);
+              code_gen->current_body = NULL;
+            }
+          else
+            {
+              code_gen->curr_meta = code_gen->curr_meta->body;
+              opencl_add_gimple_for_stmt_for (for_stmt, code_gen,
+                                              depth, dependency);
+            }
+
+          opencl_fix_meta_flags (current_clast);
+          code_gen->curr_meta = current_clast->next;
+        }
+      else if (CLAST_STMT_IS_A (s, stmt_guard))
+        opencl_add_gimple_for_stmt_guard ((struct clast_guard *) s,
+                                          code_gen, depth);
+      else if (CLAST_STMT_IS_A (s, stmt_block))
+        opencl_transform_stmt_list (((struct clast_block *)s)->body,
+                                    code_gen, depth);
+      else
+        gcc_unreachable ();
+      if (tmp->parent)
+        opencl_fix_meta_flags (tmp->parent);
+    }
+}
+
+/* Find opencl_data object by host object OBJ in CODE_GEN hash maps.  */
+
+opencl_data
+opencl_get_data_by_tree (opencl_main code_gen, tree obj)
+{
+  map_tree_to_data tmp = map_tree_to_data_create (obj, NULL);
+  map_tree_to_data * slot
+    = (map_tree_to_data *) htab_find_slot (code_gen->tree_to_data,
+					   tmp, INSERT);
+  if (*slot == NULL)
+    return NULL;
+
+  return (*slot)->value;
+}
+
+/* Find opencl_data object by data reference REF in CODE_GEN hash maps.  */
+
+opencl_data
+opencl_get_data_by_data_ref (opencl_main code_gen, data_reference_p ref)
+{
+  map_ref_to_data tmp = map_ref_to_data_create (ref, NULL);
+  map_ref_to_data * slot
+    = (map_ref_to_data *) htab_find_slot (code_gen->ref_to_data,
+					  tmp, INSERT);
+  if (*slot == NULL)
+    return NULL;
+
+  return (*slot)->value;
+}
+
+/* Register reference to DATA via data reference REF_KEY and
+   variable TREE_KEY in CODE_GEN structures.  */
+
+static void
+opencl_register_data (opencl_main code_gen, opencl_data data,
+                      tree tree_key, data_reference_p ref_key)
+{
+  htab_t ref_to_data = code_gen->ref_to_data;
+  htab_t tree_to_data = code_gen->tree_to_data;
+  map_ref_to_data ref_ptr = map_ref_to_data_create (ref_key, data);
+  map_tree_to_data tree_ptr = map_tree_to_data_create (tree_key, data);
+  map_ref_to_data * ref_slot;
+  map_tree_to_data * tree_slot;
+
+
+  ref_slot
+    = (map_ref_to_data *) htab_find_slot (ref_to_data, ref_ptr, INSERT);
+  gcc_assert (*ref_slot == NULL);
+  *ref_slot = ref_ptr;
+
+
+  tree_slot
+    = (map_tree_to_data *) htab_find_slot (tree_to_data, tree_ptr, INSERT);
+  gcc_assert (*tree_slot == NULL || (*tree_slot)->value == data);
+  *tree_slot = tree_ptr;
+}
+
+/* Create required OpenCL variable for given DATA.  */
+
+static void
+opencl_data_init_object (opencl_data data)
+{
+  if (TREE_CODE (TREE_TYPE (data->exact_object)) == POINTER_TYPE)
+    {
+      data->device_object
+	= opencl_create_tmp_var (ptr_type_node, "__opencl_data");
+      data->is_static = false;
+    }
+  else
+    {
+      /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */
+      map_tree_to_data tree_ptr
+	= map_tree_to_data_create (data->exact_object, data);
+
+      map_tree_to_data * tree_slot =
+	(map_tree_to_data *) htab_find_slot (array_data_to_tree,
+					     tree_ptr, INSERT);
+      gcc_assert (*tree_slot == NULL);
+      *tree_slot = tree_ptr;
+
+      data->device_object
+	= opencl_create_static_ptr_variable ("__opencl_data");
+      data->is_static = true;
+      data->size_variable = data->size_value;
+      VEC_safe_push (opencl_data, heap, opencl_array_data, data);
+    }
+}
+
+/* Find opencl_data which represents array VAR.  */
+
+static opencl_data
+opencl_get_static_data_by_tree (tree var)
+{
+  map_tree_to_data tmp = map_tree_to_data_create (var, NULL);
+  map_tree_to_data * slot
+    = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
+					   tmp, INSERT);
+  if (*slot == NULL)
+    return NULL;
+
+  return (*slot)->value;
+
+}
+
+/* Analyze single data reference REF and update CODE_GEN structures.
+   If it access data, which has been accessed in data references
+   before, update it's size.  Otherwise add data to array.  */
+
+static void
+opencl_parse_single_data_ref (poly_dr_p ref, opencl_main code_gen)
+{
+  data_reference_p d_ref = (data_reference_p) PDR_CDR (ref);
+  tree data_ref_tree = dr_outermost_base_object (d_ref);
+  tree size = NULL_TREE;
+  opencl_data curr;
+
+  curr = opencl_get_data_by_tree (code_gen, data_ref_tree);
+  size = opencl_get_indirect_size (data_ref_tree, ref);
+  if (curr)
+    {
+      if (!curr->is_static)
+        {
+          if (!size || !curr->size_value)
+            curr->size_value = NULL;
+          else
+            curr->size_value = fold_build2 (MAX_EXPR, sizetype,
+                                            size, curr->size_value);
+        }
+    }
+  else
+    {
+      curr = opencl_get_static_data_by_tree (data_ref_tree);
+      if (!curr)
+        {
+          curr = opencl_data_create (data_ref_tree, size);
+          opencl_data_init_object (curr);
+        }
+      curr->id = VEC_length (opencl_data, code_gen->opencl_function_data);
+      VEC_safe_push (opencl_data, heap, code_gen->opencl_function_data, curr);
+    }
+  opencl_register_data (code_gen, curr, data_ref_tree, d_ref);
+}
+
+/* Analyse all data reference for poly basic block PBB and update CODE_GEN
+   structures.  */
+
+static void
+opencl_parse_data_refs (poly_bb_p pbb, opencl_main code_gen)
+{
+  VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+  int i;
+  poly_dr_p curr;
+  for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+    opencl_parse_single_data_ref (curr, code_gen);
+}
+
+/* Analyse all data reference for scop M_SCOP and update
+   CODE_GEN structures.  */
+
+static void
+opencl_init_data (scop_p m_scop, opencl_main code_gen)
+{
+  VEC (poly_bb_p, heap) * bbs = SCOP_BBS (m_scop);
+  int i;
+  poly_bb_p curr;
+  for (i = 0; VEC_iterate (poly_bb_p, bbs, i, curr); i++)
+    opencl_parse_data_refs (curr, code_gen);
+}
+
+/* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge.  */
+
+static void
+opencl_wait_for_event (opencl_main code_gen, tree event_var)
+{
+  tree function = opencl_create_function_decl (WAIT_FOR_EVENTS);
+  tree call = build_call_expr (function, 2,
+                               integer_one_node,
+                               event_var);
+  opencl_add_safe_call (code_gen, call, true);
+}
+
+/* This calls must be placed after outermost loop processing.  */
+
+/* Add function calls to create and launch kernel KERNEL to
+   CODE_GEN->main_edge.  */
+
+static void
+opencl_create_gimple_for_body (opencl_body kernel, opencl_main code_gen)
+{
+  tree num_of_exec = kernel->num_of_exec;
+  tree call;
+
+  tree kernel_var
+    = opencl_insert_create_kernel_call (code_gen, (const char *)kernel->name);
+
+  tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+  tree array_type = build_array_type (ptr_type_node, index_type);
+  tree var = opencl_create_tmp_var (array_type, "wait_event");
+  TREE_STATIC (var) = 1;
+  assemble_variable (var, 1, 0, 1);
+
+  call = build4 (ARRAY_REF, ptr_type_node, var,
+		 integer_zero_node, NULL_TREE, NULL_TREE);
+  call = build_addr (call, current_function_decl);
+
+  opencl_init_local_device_memory (code_gen, kernel);
+  opencl_pass_kernel_arguments (code_gen, kernel, kernel_var);
+
+  opencl_execute_kernel (code_gen, num_of_exec, kernel_var, call);
+  opencl_wait_for_event (code_gen, call);
+}
+
+/* Create global variables for opencl code.  */
+
+static void
+opencl_create_gimple_variables (void)
+{
+  static bool opencl_var_created = false;
+  if (opencl_var_created)
+    return;
+
+  opencl_var_created = true;
+  /* cl_context h_context */
+  h_context = opencl_create_static_ptr_variable ("__ocl_h_context");
+
+  /* cl_command_queue h_cmd_queue */
+  h_cmd_queue = opencl_create_static_ptr_variable ("__ocl_h_cmd_queue");
+}
+
+/* Return tree, which represents function selected by ID.
+   If ID is STATIC_INIT, init all required data.  */
+
+static tree
+opencl_create_function_decl (enum OPENCL_FUNCTUONS id)
+{
+  static tree create_context_from_type_decl = NULL;
+  static tree get_context_info_decl = NULL;
+  static tree create_command_queue_decl = NULL;
+  static tree create_program_with_source_decl = NULL;
+  static tree build_program_decl = NULL;
+  static tree create_kernel_decl = NULL;
+  static tree create_buffer_decl = NULL;
+  static tree set_kernel_arg_decl = NULL;
+  static tree enqueue_nd_range_kernel_decl = NULL;
+  static tree enqueue_read_buffer_decl = NULL;
+  static tree enqueue_write_buffer_decl = NULL;
+  static tree release_memory_obj_decl = NULL;
+  static tree release_context_decl = NULL;
+  static tree release_command_queue_decl = NULL;
+  static tree release_program_decl = NULL;
+  static tree release_kernel_decl = NULL;
+  static tree get_platform_ids_decl = NULL;
+  static tree get_wait_for_events_decl = NULL;
+  switch (id)
+    {
+    case STATIC_INIT:
+      {
+	tree const_char_type = build_qualified_type (char_type_node,
+						     TYPE_QUAL_CONST);
+	tree const_char_ptr = build_pointer_type (const_char_type);
+	tree const_char_ptr_ptr = build_pointer_type (const_char_ptr);
+
+	tree const_size_t = build_qualified_type (size_type_node,
+						  TYPE_QUAL_CONST);
+	tree const_size_t_ptr = build_pointer_type (const_size_t);
+
+	tree size_t_ptr = build_pointer_type (size_type_node);
+
+	tree cl_device_type = integer_type_node;
+	tree cl_context_info = unsigned_type_node;
+	tree cl_command_queue_properties = long_unsigned_type_node;
+	tree cl_mem_flags = long_unsigned_type_node;
+
+	tree cl_context = ptr_type_node;
+	tree cl_context_properties = ptr_type_node;
+	tree cl_command_queue = ptr_type_node;
+	tree cl_device_id = ptr_type_node;
+	tree cl_program = ptr_type_node;
+	tree cl_kernel = ptr_type_node;
+	tree cl_event = ptr_type_node;
+	tree cl_mem = ptr_type_node;
+
+	tree const_cl_event = build_qualified_type (cl_event,
+						    TYPE_QUAL_CONST);
+	tree cl_event_ptr = build_pointer_type (cl_event);
+	tree const_cl_event_ptr = build_pointer_type (const_cl_event);
+
+	tree const_cl_device_id = build_qualified_type (cl_device_id,
+							TYPE_QUAL_CONST);
+	tree const_cl_device_id_ptr = build_pointer_type (const_cl_device_id);
+
+	tree cl_platford_id = long_integer_type_node;
+	tree cl_platford_id_ptr = build_pointer_type (cl_platford_id);
+
+	tree function_type;
+	/* | cl_context
+	   | clCreateContextFromType (cl_context_properties *properties,
+	   |                          cl_device_type device_type,
+	   |                          void (*pfn_notify) (const char *errinfo,
+	   |                          const void *private_info, size_t cb,
+	   |                          void *user_data),
+	   |                          void *user_data,
+	   |                          cl_int *errcode_ret)  */
+	function_type
+	  = build_function_type_list (cl_context,
+				      cl_context_properties,
+				      cl_device_type,
+				      ptr_type_node,
+				      ptr_type_node,
+				      integer_ptr_type_node,
+				      NULL_TREE);
+	create_context_from_type_decl
+	  = build_fn_decl (opencl_function_names[0], function_type);
+
+	/* | cl_int clGetContextInfo (cl_context context,
+	   |                          cl_context_info param_name,
+	   |                          size_t param_value_size,
+	   |                          void *param_value,
+	   |                          size_t *param_value_size_ret)  */
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_context,
+				      cl_context_info,
+				      size_type_node,
+				      ptr_type_node,
+				      size_t_ptr,
+				      NULL_TREE);
+	get_context_info_decl
+	  = build_fn_decl (opencl_function_names[1], function_type);
+
+	/* | cl_command_queue
+	   | clCreateCommandQueue (cl_context context,
+	   |                       cl_device_id device,
+	   |                       cl_command_queue_properties properties,
+	   |                       cl_int *errcode_ret)  */
+	function_type
+	  = build_function_type_list (cl_command_queue,
+				      cl_context,
+				      cl_device_id,
+				      cl_command_queue_properties,
+				      integer_ptr_type_node,
+				      NULL_TREE);
+	create_command_queue_decl
+	  = build_fn_decl (opencl_function_names[2], function_type);
+
+	/* | cl_program clCreateProgramWithSource (cl_context context,
+	   |                                       cl_uint count,
+	   |                                       const char **strings,
+	   |                                       const size_t *lengths,
+	   |                                       cl_int *errcode_ret)  */
+	function_type
+	  = build_function_type_list (cl_program,
+				      cl_context,
+				      unsigned_type_node,
+				      const_char_ptr_ptr,
+				      const_size_t_ptr,
+				      integer_ptr_type_node,
+				      NULL_TREE);
+	create_program_with_source_decl
+	  = build_fn_decl (opencl_function_names[3], function_type);
+
+	/* | cl_int
+	   | clBuildProgram (cl_program program,
+	   |                 cl_uint num_devices,
+	   |                 const cl_device_id *device_list,
+	   |                 const char *options,
+	   |                 void (*pfn_notify) (cl_program, void *user_data),
+	   |                 void *user_data)  */
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_program,
+				      unsigned_type_node,
+				      const_cl_device_id_ptr,
+				      const_char_ptr,
+				      ptr_type_node,
+				      ptr_type_node,
+				      NULL_TREE);
+	build_program_decl
+	  = build_fn_decl (opencl_function_names[4], function_type);
+
+	/* | cl_kernel clCreateKernel (cl_program program,
+	   |                           const char *kernel_name,
+	   |                           cl_int *errcode_ret)  */
+	function_type
+	  = build_function_type_list (cl_kernel,
+				      cl_program,
+				      const_char_ptr,
+				      integer_ptr_type_node,
+				      NULL_TREE);
+
+	create_kernel_decl
+	  = build_fn_decl (opencl_function_names[5], function_type);
+
+	/* | cl_mem clCreateBuffer (cl_context context,
+	   |                        cl_mem_flags flags,
+	   |                        size_t size,
+	   |                        void *host_ptr,
+	   |                        cl_int *errcode_ret)  */
+
+	function_type
+	  = build_function_type_list (cl_mem,
+				      cl_context,
+				      cl_mem_flags,
+				      size_type_node,
+				      ptr_type_node,
+				      integer_ptr_type_node,
+				      NULL_TREE);
+	create_buffer_decl
+	  = build_fn_decl (opencl_function_names[6], function_type);
+
+
+	/* | cl_int clSetKernelArg (cl_kernel kernel,
+	   |                        cl_uint arg_index,
+	   |                        size_t arg_size,
+	   |                        const void *arg_value)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_kernel,
+				      unsigned_type_node,
+				      size_type_node,
+				      const_ptr_type_node,
+				      NULL_TREE);
+	set_kernel_arg_decl
+	  = build_fn_decl (opencl_function_names[7], function_type);
+
+	/* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue,
+	   |                                cl_kernel kernel,
+	   |                                cl_uint work_dim,
+	   |                                const size_t *global_work_offset,
+	   |                                const size_t *global_work_size,
+	   |                                const size_t *local_work_size,
+	   |                                cl_uint num_events_in_wait_list,
+	   |                                const cl_event *event_wait_list,
+	   |                                cl_event *event)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_command_queue,
+				      cl_kernel,
+				      unsigned_type_node,
+				      const_size_t_ptr,
+				      const_size_t_ptr,
+				      const_size_t_ptr,
+				      unsigned_type_node,
+				      const_cl_event_ptr,
+				      cl_event_ptr,
+				      NULL_TREE);
+
+	enqueue_nd_range_kernel_decl
+	  = build_fn_decl (opencl_function_names[8], function_type);
+
+	/* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue,
+	   |                             cl_mem buffer,
+	   |                             cl_bool blocking_read,
+	   |                             size_t offset,
+	   |                             size_t cb,
+	   |                             void *ptr,
+	   |                             cl_uint num_events_in_wait_list,
+	   |                             const cl_event *event_wait_list,
+	   |                             cl_event *event)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_command_queue,
+				      cl_mem,
+				      unsigned_type_node,
+				      size_type_node,
+				      size_type_node,
+				      ptr_type_node,
+				      unsigned_type_node,
+				      const_cl_event_ptr,
+				      cl_event_ptr,
+				      NULL_TREE);
+
+	enqueue_read_buffer_decl
+	  = build_fn_decl (opencl_function_names[9], function_type);
+
+	/* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue,
+	   |                              cl_mem buffer,
+	   |                              cl_bool blocking_write,
+	   |                              size_t offset,
+	   |                              size_t cb,
+	   |                              const void *ptr,
+	   |                              cl_uint num_events_in_wait_list,
+	   |                              const cl_event *event_wait_list,
+	   |                              cl_event *event)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      cl_command_queue,
+				      cl_mem,
+				      unsigned_type_node,
+				      size_type_node,
+				      size_type_node,
+				      const_ptr_type_node,
+				      unsigned_type_node,
+				      const_cl_event_ptr,
+				      cl_event_ptr,
+				      NULL_TREE);
+
+	enqueue_write_buffer_decl
+	  = build_fn_decl (opencl_function_names[10], function_type);
+
+
+	/* cl_int clReleaseMemObject (cl_mem memobj)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node, cl_mem, NULL_TREE);
+
+	release_memory_obj_decl
+	  = build_fn_decl (opencl_function_names[11], function_type);
+
+
+	/* cl_int clReleaseContext (cl_context context)  */
+	function_type
+	  = build_function_type_list (integer_type_node, cl_context,
+				      NULL_TREE);
+
+	release_context_decl
+	  = build_fn_decl (opencl_function_names[12], function_type);
+
+	/* cl_int clReleaseCommandQueue (cl_command_queue command_queue)  */
+	function_type
+	  = build_function_type_list (integer_type_node, cl_command_queue,
+				      NULL_TREE);
+
+	release_command_queue_decl
+	  = build_fn_decl (opencl_function_names[13], function_type);
+
+	/* cl_int clReleaseProgram (cl_program program)  */
+	function_type
+	  = build_function_type_list (integer_type_node, cl_program,
+				      NULL_TREE);
+
+	release_program_decl
+	  = build_fn_decl (opencl_function_names[14], function_type);
+
+	/* cl_int clReleaseKernel (cl_kernel kernel)  */
+	function_type
+	  = build_function_type_list (integer_type_node, cl_kernel, NULL_TREE);
+
+	release_kernel_decl
+	  = build_fn_decl (opencl_function_names[15], function_type);
+
+	/* | cl_int clGetPlatformIDs (cl_uint num_entries,
+	   |                          cl_platform_id *platforms,
+	   |                          cl_uint *num_platforms)  */
+
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      unsigned_type_node,
+				      cl_platford_id_ptr,
+				      build_pointer_type (unsigned_type_node),
+				      NULL_TREE);
+	get_platform_ids_decl
+	  = build_fn_decl (opencl_function_names [16], function_type);
+
+
+	/* | cl_int clWaitForEvents (cl_uint num_events,
+	   |                         const cl_event *event_list)  */
+
+	function_type
+	  = build_function_type_list (integer_type_node,
+				      unsigned_type_node,
+				      const_cl_event_ptr,
+				      NULL_TREE);
+
+	get_wait_for_events_decl
+	  = build_fn_decl (opencl_function_names [17], function_type);
+
+	return NULL_TREE;
+      }
+    case CREATE_CONTEXT_FROM_TYPE: return create_context_from_type_decl;
+    case GET_CONTEXT_INFO: return get_context_info_decl;
+    case CREATE_COMMAND_QUEUE: return create_command_queue_decl;
+    case CREATE_PROGRAM_WITH_SOURCE: return create_program_with_source_decl;
+    case BUILD_PROGRAM: return build_program_decl;
+    case CREATE_KERNEL: return create_kernel_decl;
+    case CREATE_BUFFER: return create_buffer_decl;
+    case SET_KERNEL_ARG: return set_kernel_arg_decl;
+    case ENQUEUE_ND_RANGE_KERNEL: return enqueue_nd_range_kernel_decl;
+    case ENQUEUE_READ_BUFFER: return enqueue_read_buffer_decl;
+    case ENQUEUE_WRITE_BUFFER: return enqueue_write_buffer_decl;
+    case RELEASE_MEMORY_OBJ: return release_memory_obj_decl;
+    case RELEASE_CONTEXT: return release_context_decl;
+    case RELEASE_COMMAND_QUEUE: return release_command_queue_decl;
+    case RELEASE_PROGRAM: return release_program_decl;
+    case RELEASE_KERNEL: return release_kernel_decl;
+    case GET_PLATFORM_IDS: return get_platform_ids_decl;
+    case WAIT_FOR_EVENTS: return get_wait_for_events_decl;
+    default: gcc_unreachable ();
+    }
+}
+
+/* Create call
+   | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0,
+   |                   &n_context_descriptor_size);
+
+   POINTER_TO_SIZE if &n_context_descriptor_size.  */
+
+static tree
+opencl_create_clGetContextInfo_1 (tree pointer_to_size)
+{
+  tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
+  tree zero_pointer = null_pointer_node;
+  tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
+  tree context_var = h_context;
+  tree call = build_call_expr (function, 5,
+                               context_var,
+                               cl_contex_devices,
+                               integer_zero_node,
+                               zero_pointer,
+                               pointer_to_size);
+  return call;
+}
+
+/* Create call
+   | clGetContextInfo (h_context, CL_CONTEXT_DEVICES,
+   |                   n_context_descriptor_size, A_DEVICES, 0);
+
+   POINTER_TO_SIZE if &n_context_descriptor_size.  */
+
+static tree
+opencl_create_clGetContextInfo_2 (tree size, tree a_devices)
+{
+  tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
+  tree zero_pointer = null_pointer_node;
+  tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
+  tree context_var = h_context;
+  tree call = build_call_expr (function, 5,
+                               context_var,
+                               cl_contex_devices,
+                               size,
+                               a_devices,
+                               zero_pointer);
+  return call;
+}
+
+/* Create context_properties array variable.  */
+
+static tree
+opencl_create_context_properties (void)
+{
+  tree cl_context_properties_type = long_integer_type_node;
+
+  tree index_type = build_index_type (build_int_cst (NULL_TREE, 3));
+  tree array_type = build_array_type (cl_context_properties_type,
+                                      index_type);
+  return opencl_create_tmp_var (array_type, "context_properties");
+}
+
+/* Place calls to obtain current platform id to INIT_EDGE.
+   Place obtained id to VAR.  */
+
+static edge
+opencl_set_context_properties (edge init_edge, tree var)
+{
+  tree function = opencl_create_function_decl (GET_PLATFORM_IDS);
+  tree cl_context_properties_type = long_integer_type_node;
+  tree call;
+  tree call2;
+  gimple_stmt_iterator g_iter;
+
+  basic_block bb = split_edge (init_edge);
+
+  init_edge = single_succ_edge (bb);
+
+  g_iter = gsi_last_bb (bb);
+  call = build4 (ARRAY_REF, cl_context_properties_type,
+		 var, integer_zero_node, NULL_TREE, NULL_TREE);
+  call2 = build_int_cst (NULL_TREE, CL_CONTEXT_PLATFORM);
+  call2 = build1 (CONVERT_EXPR, cl_context_properties_type, call2);
+
+  call = build2 (MODIFY_EXPR, cl_context_properties_type,
+                 call, call2);
+
+  force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+  g_iter = gsi_last_bb (bb);
+
+  call = build4 (ARRAY_REF, cl_context_properties_type,
+                 var, integer_one_node, NULL_TREE, NULL_TREE);
+  call2 = build_call_expr (function, 3,
+                           integer_one_node,
+                           build_addr (call, current_function_decl),
+                           null_pointer_node);
+  force_gimple_operand_gsi (&g_iter, call2, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+
+  call = build4 (ARRAY_REF, cl_context_properties_type,
+                 var, build_int_cst (NULL_TREE, 2), NULL_TREE, NULL_TREE);
+  call = build2 (MODIFY_EXPR, cl_context_properties_type,
+                 call, fold_convert (cl_context_properties_type,
+                                     integer_zero_node));
+  force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+                            GSI_CONTINUE_LINKING);
+  return init_edge;
+}
+
+/* Create call
+   clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0);  */
+
+static tree
+opencl_create_clCreateContextFromType (tree properties)
+{
+  tree function = opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE);
+  tree zero_pointer = null_pointer_node;
+  tree device
+    = build_int_cst (NULL_TREE, flag_graphite_opencl_cpu
+		     ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU);
+  tree call;
+  call = build_call_expr (function, 5,
+                          build_addr (properties, current_function_decl),
+                          device,
+                          zero_pointer,
+                          zero_pointer,
+                          zero_pointer);
+  return call;
+}
+
+/* Create call
+   clCreateCommandQueue (h_context, DEV_ID, 0, 0);  */
+
+static tree
+opencl_create_clCreateCommandQueue (tree dev_id)
+{
+
+  tree function = opencl_create_function_decl (CREATE_COMMAND_QUEUE);
+  tree zero_pointer = null_pointer_node;
+  tree context = h_context;
+  tree call = build_call_expr (function, 4,
+                               context,
+                               dev_id,
+                               zero_pointer,
+                               zero_pointer);
+  return call;
+}
+
+/* Create call malloc (ARG).  */
+
+static tree
+opencl_create_malloc_call (tree arg)
+{
+  tree function_type =
+    build_function_type_list (ptr_type_node,
+			      integer_type_node,
+			      NULL_TREE);
+  tree function = build_fn_decl ("malloc", function_type);
+
+  tree call = build_call_expr (function, 1, arg);
+  return call;
+}
+
+/* Init basic block in CODE_GEN structures.  */
+
+static void
+opencl_init_basic_blocks (opencl_main code_gen)
+{
+  code_gen->data_init_bb = opencl_create_bb (code_gen);
+  code_gen->kernel_edge = code_gen->main_edge;
+}
+
+/* Generate calls for opencl init functions and place them to INIT_EDGE.
+   Must be called only once in each function.  */
+
+static edge
+opencl_create_init_context (edge init_edge)
+{
+  tree tmp_type;
+  tree call;
+  tree n_context_descriptor_size
+    = opencl_create_tmp_var (size_type_node, "__ocl_nContextDescriptorSize");
+  tree a_devices = opencl_create_tmp_var (build_pointer_type (ptr_type_node),
+					  "__ocl_a_devices");
+  tree properties = opencl_create_context_properties ();
+  init_edge = opencl_set_context_properties (init_edge, properties);
+  call = opencl_create_clCreateContextFromType (properties);
+  call = build2 (MODIFY_EXPR, TREE_TYPE (h_context),
+                 h_context, call);
+  init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+  tmp_type = build_pointer_type
+    (TREE_TYPE (n_context_descriptor_size));
+  call = build1 (ADDR_EXPR, tmp_type,
+                 n_context_descriptor_size);
+  call = opencl_create_clGetContextInfo_1 (call);
+  init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
+  call = opencl_create_malloc_call (n_context_descriptor_size);
+  call = fold_convert (TREE_TYPE (a_devices), call);
+  call = build2 (MODIFY_EXPR, TREE_TYPE (a_devices), a_devices, call);
+  init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+  call = opencl_create_clGetContextInfo_2 (n_context_descriptor_size,
+                                           a_devices);
+  init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
+  tmp_type = TREE_TYPE (TREE_TYPE (a_devices));
+  call = build1 (INDIRECT_REF, tmp_type, a_devices);
+  call = opencl_create_clCreateCommandQueue (call);
+  call = build2 (MODIFY_EXPR, TREE_TYPE (h_cmd_queue),
+                 h_cmd_queue, call);
+  init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+  return init_edge;
+}
+
+/* Fill array VEC with all poly basic blocks in clast statement ROOT.  */
+
+static void
+build_poly_bb_vec (struct clast_stmt * root,
+                   VEC (poly_bb_p, heap) ** vec)
+{
+  while (root)
+    {
+      if (CLAST_STMT_IS_A (root, stmt_user))
+        {
+          poly_bb_p tmp
+	    = (poly_bb_p) cloog_statement_usr
+	    (((struct clast_user_stmt *) root)->statement);
+          VEC_safe_push (poly_bb_p, heap, *vec, tmp);
+        }
+      else if (CLAST_STMT_IS_A (root, stmt_for))
+        build_poly_bb_vec (((struct clast_for *) root)->body, vec);
+      else if (CLAST_STMT_IS_A (root, stmt_guard))
+        build_poly_bb_vec (((struct clast_guard *) root)->then, vec);
+      else if (CLAST_STMT_IS_A (root, stmt_block))
+        build_poly_bb_vec (((struct clast_block *) root)->body, vec);
+      root = root->next;
+    }
+}
+
+/* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL.
+   CAN_BE_PRIVATE indicates which variables can be privatizated.
+   CODE_GEN holds information related to code generation.  */
+
+static bool
+opencl_dependency_between_pbbs_p (opencl_main code_gen, poly_bb_p pbb1,
+                                  poly_bb_p pbb2, int level,
+                                  bitmap can_be_private)
+{
+  int i, j;
+  poly_dr_p pdr1, pdr2;
+  timevar_push (TV_GRAPHITE_DATA_DEPS);
+  for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb1), i, pdr1); i++)
+    {
+      data_reference_p ref1 = (data_reference_p)PDR_CDR (pdr1);
+      opencl_data data_1 = opencl_get_data_by_data_ref (code_gen, ref1);
+      if (bitmap_bit_p (can_be_private, data_1->id))
+        continue;
+
+      for (j = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb2), j, pdr2); j++)
+        {
+          data_reference_p ref2 = (data_reference_p)PDR_CDR (pdr2);
+
+          opencl_data data_2 = opencl_get_data_by_data_ref (code_gen, ref2);
+
+          if (bitmap_bit_p (can_be_private, data_2->id))
+            continue;
+
+          if (graphite_carried_dependence_level_k (pdr1, pdr2, level))
+            {
+              timevar_pop (TV_GRAPHITE_DATA_DEPS);
+              return true;
+            }
+        }
+    }
+
+  timevar_pop (TV_GRAPHITE_DATA_DEPS);
+  return false;
+}
+
+/* Returns true, if there is dependency in clast loop STMT on depth DEPTH.
+   CODE_GEN holds information related to code generation.  */
+
+bool
+dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta,
+                            struct clast_for * stmt, int depth)
+{
+  VEC (poly_bb_p, heap) * pbbs = VEC_alloc (poly_bb_p, heap, 10);
+  int level = get_scattering_level (depth);
+  int i;
+  poly_bb_p pbb1;
+  bitmap can_be_private;
+  build_poly_bb_vec (stmt->body, &pbbs);
+
+  can_be_private = meta->can_be_private;
+
+  for (i = 0; VEC_iterate (poly_bb_p, pbbs, i, pbb1); i++)
+    {
+      int j;
+      poly_bb_p pbb2;
+      for (j = 0; VEC_iterate (poly_bb_p, pbbs, j, pbb2); j++)
+        if (opencl_dependency_between_pbbs_p (code_gen, pbb1, pbb1,
+                                              level, can_be_private))
+          {
+            VEC_free (poly_bb_p, heap, pbbs);
+            return true;
+          }
+    }
+  VEC_free (poly_bb_p, heap, pbbs);
+  return false;
+}
+
+/* Init graphite-opencl pass.  Must be called in each function before
+   any scop processing.  */
+
+void
+graphite_opencl_init (void)
+{
+  opencl_create_gimple_variables ();
+
+  /* cl_program h_program */
+  h_program
+    = opencl_create_static_ptr_variable ("__ocl_h_program");
+
+  opencl_function_kernels = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+  main_program_src = dyn_string_new (100);
+
+  opencl_array_data = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+  array_data_to_tree = htab_create (10, map_tree_to_data_to_hash,
+                                    map_tree_to_data_cmp, free);
+
+  opencl_create_function_decl (STATIC_INIT);
+}
+
+/* Create calls to initialize static data for current function and
+   place them to INIT_EDGE.  */
+
+static edge
+opencl_init_static_data (edge init_edge)
+{
+  int i;
+  opencl_data curr;
+  for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i ++)
+    {
+      tree tmp;
+      if (!curr->used_on_device)
+        continue;
+
+      tmp = opencl_create_memory_for_pointer (curr);
+      tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
+      init_edge = opencl_add_safe_call_on_edge (tmp, false, init_edge);
+    }
+  return init_edge;
+}
+
+/* Finalize graphite-opencl pass for current function.  Place all required
+   calls to STATIC_INIT_EDGE.  Must be called after all scop processing
+   in current function.  */
+
+void
+graphite_opencl_finalize (edge static_init_edge)
+{
+  int i;
+  opencl_data curr;
+  if (dyn_string_length (main_program_src) != 0)
+    {
+      tree call = build2 (EQ_EXPR, boolean_type_node,
+                          h_program, null_pointer_node);
+      basic_block buff_init_block = split_edge (static_init_edge);
+      edge before_init;
+      edge init_edge;
+
+      static_init_edge = single_succ_edge (buff_init_block);
+
+      create_empty_if_region_on_edge (static_init_edge, call);
+
+      static_init_edge = opencl_create_function_call
+	(get_true_edge_from_guard_bb (static_init_edge->dest));
+      static_init_edge = opencl_init_static_data (static_init_edge);
+      before_init = single_pred_edge (buff_init_block);
+      call = build2 (EQ_EXPR, boolean_type_node,
+                     h_context, null_pointer_node);
+
+      create_empty_if_region_on_edge (before_init, call);
+      init_edge = get_true_edge_from_guard_bb (before_init->dest);
+      init_edge = opencl_create_init_context (init_edge);
+    }
+  dyn_string_delete (main_program_src);
+  for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i++)
+    opencl_data_delete (curr);
+
+  VEC_free (tree, heap, opencl_function_kernels);
+  VEC_free (opencl_data, heap, opencl_array_data);
+  recompute_all_dominators ();
+  update_ssa (TODO_update_ssa);
+}
+
+/* Debug functions for deined data structures.  */
+
+static void
+dump_flag_to_file (const char * name, bool cond,
+                   FILE * file, int indent)
+{
+  indent_to (file, indent);
+  fprintf (file, "%s = %s", name, cond? "true" : "false");
+}
+
+void
+dump_opencl_data (opencl_data data, FILE * file, bool verbose)
+{
+  fprintf (file, "Data id = %d\n", data->id);
+  fprintf (file, "Data dimension = %d\n", data->data_dim);
+  fprintf (file, "Data depth = %d\n", data->depth);
+  fprintf (file, "Flags");
+  indent_to (file, 2);
+  fprintf (file, "Global");
+  dump_flag_to_file ("Static", data->is_static, file, 4);
+  dump_flag_to_file ("Can be private", data->can_be_private, file, 4);
+  dump_flag_to_file ("Used on device", data->used_on_device, file, 4);
+  dump_flag_to_file ("Ever read on device",
+                     data->ever_read_on_device, file, 4);
+
+  dump_flag_to_file ("Ever written on device",
+                     data->ever_written_on_device, file, 4);
+
+  dump_flag_to_file ("Supported", data->supported, file, 4);
+  indent_to (file, 2);
+  fprintf (file, "Local");
+
+  dump_flag_to_file ("Up to date on device",
+                     data->up_to_date_on_device, file, 4);
+  dump_flag_to_file ("Up to date on host",
+                     data->up_to_date_on_host, file, 4);
+
+  dump_flag_to_file ("Inited in current loop on host",
+                     data->inited_in_current_loop_on_host, file, 4);
+
+  dump_flag_to_file ("Inited in current loop on device",
+                     data->inited_in_current_loop_on_device, file, 4);
+
+  dump_flag_to_file ("Written in current body",
+                     data->written_in_current_body, file, 4);
+
+  dump_flag_to_file ("Read in current body",
+                     data->read_in_current_body, file, 4);
+  dump_flag_to_file ("Privatized", data->privatized, file, 4);
+
+  fprintf (file, "\n");
+
+  if (verbose)
+    {
+      fprintf (file, "\nObject\n");
+      print_node_brief (file, "", data->object, 2);
+
+      fprintf (file, "\nDevice object\n");
+      print_node_brief (file, "", data->device_object, 2);
+
+      fprintf (file, "\nSize value\n");
+      print_node_brief (file, "", data->size_value, 2);
+
+      fprintf (file, "\nSize variable\n");
+      print_node_brief (file, "", data->size_variable, 2);
+
+      fprintf (file, "\nExact object\n");
+      print_node_brief (file, "", data->exact_object, 2);
+    }
+}
+
+DEBUG_FUNCTION void
+debug_opencl_data (opencl_data data, bool verbose)
+{
+  dump_opencl_data (data, stderr, verbose);
+}
+
+void
+dump_opencl_body (opencl_body body, FILE * file, bool verbose)
+{
+  fprintf (file, "\n%s\n\n", body->name);
+  fprintf (file, "First iterator: %s\n", body->first_iter);
+  fprintf (file, "Last iterator: %s\n", body->last_iter);
+  fprintf (file, "Number of data writes = %d\n\n", body->num_of_data_writes);
+  fprintf (file, "Function header::\n");
+  fprintf (file, "%s\n\n", dyn_string_buf (body->header));
+  fprintf (file, "Non scalar args::\n");
+  fprintf (file, "%s\n\n", dyn_string_buf (body->non_scalar_args));
+  fprintf (file, "Pre header::\n");
+  fprintf (file, "%s\n\n", dyn_string_buf (body->pre_header));
+  fprintf (file, "Body::\n");
+  fprintf (file, "%s\n\n", dyn_string_buf (body->body));
+
+  fprintf (file, "Number of executions::\n");
+  print_node_brief (file, "", body->num_of_exec, 2);
+
+  if (verbose)
+    print_clast_stmt (file, body->clast_body);
+}
+
+DEBUG_FUNCTION void
+debug_opencl_body (opencl_body body, bool verbose)
+{
+  dump_opencl_body (body, stderr, verbose);
+}
+
+void
+dump_opencl_clast_meta (opencl_clast_meta meta, FILE * file,
+                        bool verbose, int indent)
+{
+  if (!verbose)
+    /* Just print structure of meta.  */
+    {
+      while (meta)
+        {
+          indent_to (file, indent);
+          fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
+                   meta->in_depth, meta->out_depth,
+                   meta->on_device?"true":"false",
+                   meta->access_unsupported?"false":"true");
+          dump_opencl_clast_meta (meta->body, file, false, indent + 4);
+          meta = meta->next;
+        }
+    }
+  else
+    {
+      fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
+               meta->in_depth, meta->out_depth,
+               meta->on_device?"true":"false",
+               meta->access_unsupported?"false":"true");
+
+      fprintf (file, "\nModified on host::\n");
+      debug_bitmap_file (file, meta->modified_on_host);
+
+      fprintf (file, "\nModified on device::\n");
+      debug_bitmap_file (file, meta->modified_on_device);
+
+      fprintf (file, "\nAccess::\n");
+      debug_bitmap_file (file, meta->access);
+
+      fprintf (file, "\nCan be private::\n");
+      debug_bitmap_file (file, meta->can_be_private);
+    }
+}
+
+DEBUG_FUNCTION void
+debug_opencl_clast_meta (opencl_clast_meta meta, bool verbose)
+{
+  dump_opencl_clast_meta (meta, stderr, verbose, 0);
+}
+
+static int
+print_char_p_htab (void ** h, void * v)
+{
+  char ** ptr = (char **)h;
+  FILE * file = (FILE *)v;
+  fprintf (file, "  %s\n", *ptr);
+  return 1;
+}
+
+static int
+print_tree_to_data_htab (void ** h, void * v)
+{
+  map_tree_to_data * map = (map_tree_to_data *)h;
+  FILE * file = (FILE *)v;
+  tree key = (*map)->key;
+  opencl_data data = (*map)->value;
+  print_node_brief (file, "key = ", key, 2);
+  fprintf (file, " data_id =  %d\n", data->id);
+  return 1;
+}
+
+static int
+print_ref_to_data_htab (void ** h, void * v)
+{
+  map_ref_to_data * map = (map_ref_to_data *)h;
+  FILE * file = (FILE *)v;
+  data_reference_p key = (*map)->key;
+  opencl_data data = (*map)->value;
+  fprintf (file, "key::\n");
+  dump_data_reference (file, key);
+  fprintf (file, "data_id =  %d\n\n", data->id);
+  return 1;
+}
+
+void
+dump_opencl_main (opencl_main code_gen, FILE * file, bool verbose)
+{
+  fprintf (file, "Current meta::\n");
+  dump_opencl_clast_meta (code_gen->curr_meta, file, false, 2);
+  fprintf (file, "\n");
+  if (code_gen->current_body)
+    {
+      fprintf (file, "Current body::\n");
+      dump_opencl_body (code_gen->current_body, file, verbose);
+    }
+  fprintf (file, "\n\nData init basic block::\n");
+  dump_bb (code_gen->data_init_bb, stderr, 0);
+
+  if (code_gen->defined_vars)
+    {
+      fprintf (file, "Defined variables::\n");
+      htab_traverse_noresize (code_gen->defined_vars, print_char_p_htab,
+                              file);
+    }
+
+  if (code_gen->global_defined_vars)
+    {
+      fprintf (file, "Global defined variables::\n");
+      htab_traverse_noresize (code_gen->global_defined_vars,
+                              print_char_p_htab, file);
+    }
+  fprintf (file, "Refs to data::\n");
+  htab_traverse_noresize (code_gen->ref_to_data,
+                          print_ref_to_data_htab, file);
+
+  fprintf (file, "Trees to data::\n");
+  htab_traverse_noresize (code_gen->tree_to_data,
+                          print_tree_to_data_htab, file);
+
+  if (verbose)
+    fprintf (file, "%s\n", dyn_string_buf (code_gen->main_program));
+}
+
+DEBUG_FUNCTION void
+debug_opencl_main (opencl_main code_gen, bool verbose)
+{
+  dump_opencl_main (code_gen, stderr, verbose);
+}
+
+DEBUG_FUNCTION void
+debug_opencl_program (void)
+{
+  fprintf (stderr, "%s", dyn_string_buf (main_program_src));
+}
+
+#endif
+#include "gt-graphite-opencl.h"
diff --git a/gcc/graphite-opencl.h b/gcc/graphite-opencl.h
new file mode 100644
index 0000000..4913b09
--- /dev/null
+++ b/gcc/graphite-opencl.h
@@ -0,0 +1,254 @@ 
+/* GRAPHITE-OpenCL pass.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+#define OPENCL_INIT_BUFF_SIZE 40
+
+/* Data structure which holds information about single array or pointer
+   in current scop.  */
+struct opencl_data_def
+{
+  /* Base object.  In case of arrays base array replaced by pointer to this
+     array.  */
+  tree object;
+  /* Device object, corresponding to base object.  */
+  tree device_object;
+  /* Size of data references in scop.  */
+  tree size_value;
+
+  /* Variable to hold size_value.  */
+  tree size_variable;
+
+  /* Unique id.  */
+  int id;
+
+  /* If true, then given data represents static object.  */
+  bool is_static;
+
+  /* Exact object represented by given data.  */
+  tree exact_object;
+
+  /* If true, given data can be privatized in current context.  */
+  bool can_be_private;
+  /* Current state.  */
+  bool up_to_date_on_device;
+  bool up_to_date_on_host;
+
+  bool inited_in_current_loop_on_host;
+  bool inited_in_current_loop_on_device;
+
+  bool written_in_current_body;
+  bool read_in_current_body;
+
+  bool privatized;
+
+  /* Data dimension.  */
+  int data_dim;
+  int depth;
+
+  /* Access information.  */
+  bool used_on_device;
+  bool ever_read_on_device;
+  bool ever_written_on_device;
+
+  bool supported;
+};
+
+typedef struct opencl_data_def * opencl_data;
+typedef const char * char_p;
+
+DEF_VEC_P (opencl_data);
+DEF_VEC_ALLOC_P (opencl_data, heap);
+
+/* Data structure with meta information about clast statement.
+   |
+   |                         LOOP_1 {out_depth = 0, in_depth = 3}
+   |                         /  \
+   |                        /    \
+   |                       /      \
+   |                      /        \
+   |  {out_depth = 1, LOOP_2     STMT_1 {in_depth = 0,
+   |    in_depth = 2}                    out_depth = 1}
+   |         |
+   |         |
+   |         |
+   |       LOOP_3 {out_depth = 2, in_depth = 1}
+   |         |
+   |         |
+   |         |
+   |       STMT_2 {out_depth = 3, in_depth = 3}
+*/
+struct opencl_clast_meta_def
+{
+  /* Outer depth.  */
+  int out_depth;
+  /* Inner depth.  */
+  int in_depth;
+  /* True means, that statement will be executed on device.  */
+  bool on_device;
+  struct opencl_clast_meta_def * next;
+  struct opencl_clast_meta_def * body;
+  struct opencl_clast_meta_def * parent;
+
+  /* All init code for this statement will be placed on this edge.  */
+  edge init_edge;
+
+  /* Data modifications in this statement (including children).  */
+  bitmap modified_on_device;
+  bitmap modified_on_host;
+
+  /* Data, which can be private in current loop.  */
+  bitmap can_be_private;
+
+  /* Access information.  */
+  bool access_unsupported;
+  bitmap access;
+
+  VEC (opencl_data, heap) *post_pass_to_host;
+  VEC (opencl_data, heap) *post_pass_to_device;
+};
+
+typedef struct opencl_clast_meta_def *opencl_clast_meta;
+
+DEF_VEC_P (char_p);
+DEF_VEC_ALLOC_P (char_p, heap);
+
+/* Single opencl kernel.  */
+struct graphite_opencl_kernel_body
+{
+  /* Function body.  */
+  dyn_string_t body;
+
+  /* Function header.  */
+  dyn_string_t header;
+
+  /* Variables declarations.  */
+  dyn_string_t pre_header;
+
+  /* Number of executions for kernel.  */
+  tree num_of_exec;
+
+  dyn_string_t non_scalar_args;
+
+  /* Number of write data references in kernel.  */
+  int num_of_data_writes;
+
+
+  /* Clast_stmt corresponding to kernel.  */
+  struct clast_stmt *clast_body;
+
+  /*  First iterator (scat_i).  */
+  const char *first_iter;
+
+  /*  First iterator (scat_{i+n}).  */
+  const char *last_iter;
+
+  /* Kernel name.  All kernels have names opencl_auto_function_<num>.  */
+  char name[40];
+
+  /* Variables, which must be passed to kernel.  */
+  VEC (tree, heap) *function_args;
+  VEC (tree, heap) *function_args_to_pass;
+  VEC (opencl_data, heap) *data_refs;
+};
+
+typedef struct graphite_opencl_kernel_body *opencl_body;
+
+
+/*  Main data structure for translating clast to gimple with opencl
+    function calls.  */
+struct graphite_opencl_creator
+{
+  /* Array with scat_* (iterators from clast data structures) names.  */
+  CloogNames *root_names;
+
+  /* Current kernel.  */
+  opencl_body current_body;
+
+  /* Current region.  */
+  sese region;
+
+  /* Meta information for current scop.  */
+  opencl_clast_meta clast_meta;
+
+  /* Current meta statement.  */
+  opencl_clast_meta curr_meta;
+
+  /* Htab of all defined local vars (not tmp vars, generated by gimplify).  */
+  htab_t defined_vars;
+
+  /* Htab of all defined global vars.  */
+  htab_t global_defined_vars;
+
+  /* Current edge.  */
+  edge main_edge;
+
+  /* Edge with kernels executing.  */
+  edge kernel_edge;
+
+  /* Current opencl program.  */
+  dyn_string_t main_program;
+
+  /* Information from clast structures.  */
+  htab_t newivs_index;
+  htab_t params_index;
+  VEC (tree, heap) *newivs;
+
+  /* Current loop.  */
+  loop_p context_loop;
+
+  /* Basic block with init statements for data.  */
+  basic_block data_init_bb;
+
+  /* Data used in kernel.  */
+  VEC(opencl_data, heap) *opencl_function_data;
+
+  VEC(tree, heap) *iv_map;
+
+  htab_t ref_to_data;
+  htab_t tree_to_data;
+};
+
+typedef struct graphite_opencl_creator * opencl_main;
+
+extern opencl_body opencl_clast_to_kernel (struct clast_for *,
+                                           opencl_main, int);
+extern tree dr_outermost_base_object (data_reference_p);
+extern void dump_opencl_main (opencl_main, FILE *, bool);
+extern void dump_opencl_body (opencl_body, FILE *, bool);
+extern void dump_opencl_clast_meta (opencl_clast_meta, FILE *, bool, int);
+extern void dump_opencl_data (opencl_data, FILE *, bool);
+extern void debug_opencl_main (opencl_main, bool);
+extern void debug_opencl_program (void);
+extern void debug_opencl_body (opencl_body, bool);
+extern void debug_opencl_clast_meta (opencl_clast_meta, bool);
+extern void debug_opencl_data (opencl_data, bool);
+
+/* Find opencl_data object by host object.  */
+extern opencl_data opencl_get_data_by_data_ref (opencl_main, data_reference_p);
+extern opencl_data opencl_get_data_by_tree (opencl_main, tree);
+extern tree opencl_tree_to_var (basic_block, tree);
+extern opencl_clast_meta opencl_create_meta_from_clast (opencl_main,
+                                                        struct clast_stmt *,
+                                                        int, opencl_clast_meta);
+extern bool opencl_should_be_parallel_p (opencl_main, opencl_clast_meta, int);
+
+/* Create new basic block on main edge and update main_edge.  */
+extern basic_block opencl_create_bb (opencl_main);
+extern bool dependency_in_clast_loop_p (opencl_main, opencl_clast_meta,
+                                        struct clast_for *, int);
+extern tree opencl_get_base_object_by_tree (tree);
diff --git a/gcc/graphite.c b/gcc/graphite.c
index 4ce484a..64779bb 100644
--- a/gcc/graphite.c
+++ b/gcc/graphite.c
@@ -281,10 +281,16 @@  graphite_transform_loops (void)
       {
 	build_poly_scop (scop);
 
+        if (flag_graphite_opencl)
+          graphite_opencl_init ();
+
 	if (POLY_SCOP_P (scop)
 	    && apply_poly_transforms (scop)
 	    && gloog (scop, bb_pbb_mapping))
 	  need_cfg_cleanup_p = true;
+
+        if (flag_graphite_opencl)
+          graphite_opencl_finalize (single_succ_edge (ENTRY_BLOCK_PTR));
       }
 
   htab_delete (bb_pbb_mapping);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 4b51f40..d1d7142 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -308,7 +308,8 @@  gate_graphite_transforms (void)
       || flag_loop_strip_mine
       || flag_graphite_identity
       || flag_loop_parallelize_all
-      || flag_loop_flatten)
+      || flag_loop_flatten
+      || flag_graphite_opencl)
     flag_graphite = 1;
 
   return flag_graphite != 0;