@@ -1,3 +1,30 @@
+2010-12-25 Alexey Kravets <kayrick@ispras.ru>
+
+ * Makefile.in (OBJS-common): Add graphite-opencl-codegen.o,
+ graphite-opencl-meta-clast.o and graphite-opencl.o.
+ (graphite-opencl-codegen.o): New.
+ (graphite-opencl-meta-clast.o): New.
+ (graphite-opencl.o): New.
+ (GTFILES): Add graphite-opencl.c.
+ * common.opt (fgraphite-opencl): New.
+ (fgraphite-opencl-cpu): New.
+ (fgraphite-opencl-ignore-depth-heuristic): New.
+ (fgraphite-opencl-ignore-mem-heuristic): New.
+ (fgraphite-opencl-ignore-dep): New.
+ (fgraphite-opencl-ignore-types): New.
+ (fgraphite-opencl-depth-base): New.
+ (fgraphite-opencl-debug): New.
+ * graphite-opencl-codegen.c: New file.
+ * graphite-opencl-meta-clast.c: New file.
+ * graphite-opencl.c: New file.
+ * graphite-opencl.h: New file.
+ * graphite.c (graphite_transform_loops): Call graphite_opencl_init and
+ graphite_opencl_finalize.
+ * tree-ssa-loop.c (gate_graphite_transforms): Handle
+ flag_graphite_opencl.
+ * graphite-clast-to-gimple.c (gloog): Call opencl_transform_clast.
+ * dbgcnt.def (opencl_scop_cnt): New counter.
+
2010-12-25 Alexander Monakov <amonakov@ispras.ru>
* graphite-dependences.c (graphite_outer_subscript_bound): New.
@@ -1251,6 +1251,9 @@ OBJS-common = \
graph.o \
graphds.o \
graphite.o \
+ graphite-opencl-codegen.o \
+ graphite-opencl-meta-clast.o \
+ graphite-opencl.o \
graphite-blocking.o \
graphite-clast-to-gimple.o \
graphite-cloog-util.o \
@@ -2692,6 +2695,30 @@ graphite-blocking.o: graphite-blocking.c $(CONFIG_H) $(SYSTEM_H) \
$(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
$(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h \
graphite.h graphite-poly.h graphite-ppl.h
+graphite-opencl-codegen.o: graphite-opencl-codegen.c $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TM_H) \
+ $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+ $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+ pointer-set.h value-prof.h graphite-ppl.h sese.h \
+ graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+ graphite-dependences.h
+graphite-opencl-meta-clast.o: graphite-opencl-meta-clast.c $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TM_H) \
+ $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+ $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+ pointer-set.h value-prof.h graphite-ppl.h sese.h \
+ graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+ graphite-dependences.h
+graphite-opencl.o: graphite-opencl.c $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TM_H) \
+ $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
+ $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\
+ pointer-set.h value-prof.h graphite-ppl.h sese.h \
+ graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \
+ graphite-dependences.h
graphite-clast-to-gimple.o: graphite-clast-to-gimple.c $(CONFIG_H) \
$(SYSTEM_H) coretypes.h $(TM_H) langhooks.h \
$(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(DIAGNOSTIC_CORE_H) \
@@ -3803,6 +3830,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
$(srcdir)/lto-symtab.c \
$(srcdir)/tree-ssa-alias.h \
$(srcdir)/ipa-prop.h \
+ $(srcdir)/graphite-opencl.c \
$(srcdir)/lto-streamer.h \
$(srcdir)/target-globals.h \
@all_gtfiles@
@@ -1119,6 +1119,38 @@ floop-parallelize-all
Common Report Var(flag_loop_parallelize_all) Optimization
Mark all loops as parallel
+fgraphite-opencl
+Common Report Var(flag_graphite_opencl) Optimization
+Export OpenCL from graphite
+
+fgraphite-opencl-cpu
+Common Report Var(flag_graphite_opencl_cpu) Optimization
+Generate CPU oriented OpenCL code.
+
+fgraphite-opencl-ignore-depth-heuristic
+Common Report Var(flag_graphite_opencl_no_depth_check)
+Ignore depth heuristic in graphite-opencl pass.
+
+fgraphite-opencl-ignore-mem-heuristic
+Common Report Var(flag_graphite_opencl_no_memory_transfer_check)
+Ignore mem transfer heuristic in graphite-opencl pass.
+
+fgraphite-opencl-ignore-dep
+Common Report Var(flag_graphite_opencl_no_dep_check)
+Ignore dependency checking in graphite-opencl pass.
+
+fgraphite-opencl-ignore-types
+Common Report Var(flag_graphite_opencl_no_types_check)
+Ignore supported type checking in graphite-opencl pass.
+
+fgraphite-opencl-depth-base=
+Common RejectNegative Joined UInteger Init(0) Var(opencl_base_depth_const)
+Value for depth heuristic in graphite-opencl pass.
+
+fgraphite-opencl-debug
+Common Report Var(flag_graphite_opencl_debug)
+Add checks for opencl calls return values.
+
floop-strip-mine
Common Report Var(flag_loop_strip_mine) Optimization
Enable Loop Strip Mining transformation
@@ -184,3 +184,4 @@ DEBUG_COUNTER (sms_sched_loop)
DEBUG_COUNTER (store_motion)
DEBUG_COUNTER (split_for_sched2)
DEBUG_COUNTER (tail_call)
+DEBUG_COUNTER (opencl_scop_cnt)
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple.h"
#include "langhooks.h"
#include "sese.h"
+#include "dbgcnt.h"
#ifdef HAVE_cloog
#include "cloog/cloog.h"
@@ -1517,10 +1518,15 @@ gloog (scop_p scop, htab_t bb_pbb_mapping)
create_params_index (params_index, pc.prog);
- translate_clast (region, context_loop, pc.stmt,
- if_region->true_region->entry,
- &newivs, newivs_index,
- bb_pbb_mapping, 1, params_index);
+ if (flag_graphite_opencl && dbg_cnt (opencl_scop_cnt))
+ opencl_transform_clast (pc.stmt, region, if_region->true_region->entry,
+ scop, params_index);
+ else
+ translate_clast (region, context_loop, pc.stmt,
+ if_region->true_region->entry,
+ &newivs, newivs_index,
+ bb_pbb_mapping, 1, params_index);
+
graphite_verify ();
scev_reset ();
recompute_all_dominators ();
@@ -37,6 +37,12 @@ typedef struct bb_pbb_def
poly_bb_p pbb;
}bb_pbb_def;
+/* From graphite-opencl.c */
+extern void opencl_transform_clast (struct clast_stmt *, sese, edge,
+ scop_p, htab_t);
+extern void graphite_opencl_finalize (edge);
+extern void graphite_opencl_init (void);
+
/* From graphite-clast-to-gimple.c */
extern bool gloog (scop_p, htab_t);
extern cloog_prog_clast scop_to_clast (scop_p, CloogState *);
new file mode 100644
@@ -0,0 +1,1916 @@
+/* OpencCL code generation for GRAPHITE-OpenCL.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* OpenCL code genration for GRAPHITE-OpenCL. This file implements
+ OpenCL kernel code generation from CLAST structures. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include <sys/time.h>
+#include "hashtab.h"
+#include "gimple-pretty-print.h"
+#include "tree.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+
+/* These functions implement code generation from different clast
+ structures. */
+static void opencl_print_stmt_list (struct clast_stmt *, opencl_main, int);
+static void opencl_print_for (struct clast_for *, opencl_main, int);
+static void opencl_print_guard (struct clast_guard *, opencl_main, int);
+static void opencl_print_equation (struct clast_equation *, opencl_main);
+static void opencl_print_expr (struct clast_expr *, opencl_main);
+static void opencl_add_variable (const char *, tree, opencl_main);
+static void opencl_print_term (struct clast_term *, opencl_main);
+static void opencl_print_reduction (struct clast_reduction *, opencl_main);
+static void opencl_print_sum (struct clast_reduction *, opencl_main);
+static void opencl_print_binary (struct clast_binary *, opencl_main);
+static void opencl_print_minmax_c (struct clast_reduction *, opencl_main);
+
+/* These function implement code generation from different gimple
+ objects. */
+static void opencl_print_bb (basic_block, opencl_main);
+static void opencl_print_gimple_assign_operation (gimple, opencl_main);
+static void opencl_print_gimple_assign (gimple, opencl_main);
+static void opencl_print_gimple (gimple, opencl_main);
+static int opencl_print_operand (tree, bool, opencl_main);
+
+
+static void opencl_print_local_vars (const char *, const char *, const char *,
+ opencl_main);
+static void opencl_try_variable (opencl_main, tree);
+static const char *opencl_get_var_name (tree);
+static void opencl_build_defines (tree, opencl_main);
+static void opencl_expand_scalar_vars (opencl_main, gimple);
+static void opencl_add_function_arg (opencl_main, tree, const char *);
+static void opencl_add_data_refs_pbb (poly_bb_p, opencl_main);
+static void opencl_add_non_scalar_type_decl (tree, dyn_string_t, const char *);
+static const char *opencl_print_function_arg_with_type (const char *, tree);
+static bool check_and_mark_arg (opencl_main, const char *, bool);
+
+
+/* Compare two clast names based on their indexes. */
+
+static int
+opencl_cmp_scat (const char *scat1, const char *scat2)
+{
+ int len_1 = strlen (scat1);
+ int len_2 = strlen (scat2);
+
+ if (len_1 > len_2)
+ return 1;
+
+ if (len_1 < len_2)
+ return -1;
+
+ return strcmp (scat1, scat2);
+}
+
+/* This function implements !strcmp (STR1, STR2) call. */
+
+static int
+opencl_cmp_str (const void *str1, const void *str2)
+{
+ const char *c_str1 = (const char *) str1;
+ const char *c_str2 = (const char *) str2;
+ return !strcmp (c_str1, c_str2);
+}
+
+/* Get body of generating kernel function from CODE_GEN. */
+
+static dyn_string_t
+opencl_get_current_body (opencl_main code_gen)
+{
+ return code_gen->current_body->body;
+}
+
+/* Get header of generating kernel function from CODE_GEN. */
+
+static dyn_string_t
+opencl_get_current_header (opencl_main code_gen)
+{
+ return code_gen->current_body->header;
+}
+
+/* Appends a string STR to the header of the generating kernel
+ function from CODE_GEN. */
+
+static void
+opencl_append_string_to_header (const char *str, opencl_main code_gen)
+{
+ dyn_string_t tmp = opencl_get_current_header (code_gen);
+ dyn_string_append_cstr (tmp, str);
+}
+
+/* Appends a string STR to the body of the generating kernel function
+ from CODE_GEN. */
+
+static void
+opencl_append_string_to_body (const char *str, opencl_main code_gen)
+{
+ dyn_string_t tmp = opencl_get_current_body (code_gen);
+ dyn_string_append_cstr (tmp, str);
+}
+
+/* Appends an integer NUM to the string STR following FORMAT. */
+
+static void
+opencl_append_int_to_str (dyn_string_t str, long num, const char *format)
+{
+ char tmp[100];
+ sprintf (tmp, format, num);
+ dyn_string_append_cstr (str, tmp);
+}
+
+/* Appends an integer NUM to the header of the generating kernel
+ function from CODE_GEN following FORMAT. */
+
+static void
+opencl_append_num_to_header (opencl_main code_gen, long num,
+ const char *format)
+{
+ dyn_string_t tmp = opencl_get_current_header (code_gen);
+ opencl_append_int_to_str (tmp, num, format);
+}
+
+/* Appends an integer NUM to the body of the generating kernel
+ function from CODE_GEN following FORMAT. */
+
+static void
+opencl_append_num_to_body (opencl_main code_gen, long num, const char *format)
+{
+ dyn_string_t tmp = opencl_get_current_body (code_gen);
+ opencl_append_int_to_str (tmp, num, format);
+}
+
+/* Get TYPE scalar (base) part. */
+
+static tree
+opencl_get_main_type (tree type)
+{
+ while (TREE_CODE (type) == ARRAY_TYPE
+ || TREE_CODE (type) == POINTER_TYPE)
+ type = TREE_TYPE (type);
+ return build_pointer_type (type);
+}
+
+
+/* Create the base part of FUNCTION declaration, similar to this:
+ "__global void __opencl_function_0". */
+
+static void
+opencl_create_function_code (opencl_body function)
+{
+ static int opencl_function_counter = 0;
+ dyn_string_t dest = function->header;
+ dyn_string_append_cstr (dest, "__kernel void");
+ dyn_string_append_cstr (dest, " ");
+ dyn_string_append_cstr (dest, "opencl_auto_function_");
+ opencl_append_int_to_str (dest, opencl_function_counter, "%ld");
+ dyn_string_append_cstr (dest, " (");
+ sprintf (function->name, "%s%d","opencl_auto_function_",
+ opencl_function_counter++);
+}
+
+/* Create new instance of opencl_body. */
+
+static opencl_body
+opencl_body_create (void)
+{
+ opencl_body tmp = XNEW (struct graphite_opencl_kernel_body);
+
+ tmp->body = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+ tmp->pre_header = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+ tmp->header = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+ tmp->non_scalar_args = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+
+ tmp->num_of_data_writes = 0;
+ tmp->function_args = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+ tmp->function_args_to_pass = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+ tmp->data_refs = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+ opencl_create_function_code (tmp);
+
+ return tmp;
+}
+
+/* Check whether clast expression EXPT is constant in current loop nest.
+ FIRST_SCAT is the iterator of outermost loop in current loop nest. */
+
+static bool
+opencl_constant_expression_p (struct clast_expr *expr, const char *first_scat)
+{
+ switch (expr->type)
+ {
+ case expr_term:
+ {
+ struct clast_term *term = (struct clast_term *) expr;
+ if (!(term->var))
+ return true;
+ {
+ const char *name = term->var;
+ if (strstr (name, "scat_") != name)
+ return true;
+
+ return (opencl_cmp_scat (first_scat, name) == 1);
+ }
+ }
+ case expr_red:
+ {
+ struct clast_reduction *red = (struct clast_reduction *) expr;
+ int i;
+ for (i = 0; i < red->n; i++)
+ if (!opencl_constant_expression_p (red->elts [i], first_scat))
+ return false;
+
+ return true;
+ }
+ case expr_bin:
+ {
+ struct clast_binary *bin = (struct clast_binary *) expr;
+ return opencl_constant_expression_p (bin->LHS, first_scat);
+ }
+ default:
+ gcc_unreachable ();
+ return false;
+ }
+}
+
+/* Check whether the clast_for LOOP has constant bounds. FIRST_SCAT
+ is the iterator of outermost loop in current loop nest. */
+
+static bool
+opencl_constant_loop_bound_p (struct clast_for *loop, const char *first_scat)
+{
+ return opencl_constant_expression_p (loop->UB, first_scat)
+ && opencl_constant_expression_p (loop->LB, first_scat);
+}
+
+/* If clast loop PARENT has only one child and it's a loop too, return
+ this child. Otherwise return NULL. */
+
+static struct clast_for *
+opencl_get_single_loop_child (struct clast_for *parent)
+{
+ struct clast_stmt *body = parent->body;
+
+ if (body->next
+ || !CLAST_STMT_IS_A (body, stmt_for))
+ return NULL;
+
+ return (struct clast_for *) body;
+}
+
+/* Calculate the maximal depth of a perfect nested loop nest with LOOP
+ as outermost loop. META holds meta information for loop LOOP,
+ DEPTH is the depth of LOOP in current loop nest, FIRST_SCAT is the
+ iterator of outermost loop in current loop nest. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static int
+opencl_get_perfect_nested_loop_depth (opencl_main code_gen,
+ opencl_clast_meta meta,
+ struct clast_for *loop,
+ int depth, const char *first_scat)
+{
+ struct clast_for *child;
+ if (dependency_in_clast_loop_p (code_gen, meta, loop, depth))
+ return 0;
+
+ child = opencl_get_single_loop_child (loop);
+
+ if (!child
+ || !opencl_constant_loop_bound_p (child, first_scat))
+ return 1;
+
+ return 1 + opencl_get_perfect_nested_loop_depth (code_gen, meta->body, child,
+ depth + 1, first_scat);
+}
+
+/* Get the type of the loop iterator for loop, represented by STMT.
+ LEVEL is the depth of this loop in current loop nest. CODE_GEN
+ holds information related to OpenCL code generation. */
+
+static tree
+opencl_get_loop_iter_type (struct clast_for *stmt, opencl_main code_gen,
+ int level)
+{
+ tree lb_type = gcc_type_for_clast_expr (stmt->LB, code_gen->region,
+ code_gen->newivs,
+ code_gen->newivs_index,
+ code_gen->params_index);
+ tree ub_type = gcc_type_for_clast_expr (stmt->UB, code_gen->region,
+ code_gen->newivs,
+ code_gen->newivs_index,
+ code_gen->params_index);
+ tree type = gcc_type_for_iv_of_clast_loop (stmt, level, lb_type, ub_type);
+
+ return type;
+}
+
+static const char *data_type;
+
+/* Simplified version of C-style type printing from c-aux-info.c. */
+
+static const char *
+gen_type_1 (const char *ret_val, tree t)
+{
+ switch (TREE_CODE (t))
+ {
+ case POINTER_TYPE:
+ if (TYPE_READONLY (t))
+ ret_val = concat ("const ", ret_val, NULL);
+ if (TYPE_VOLATILE (t))
+ ret_val = concat ("volatile ", ret_val, NULL);
+
+ ret_val = concat ("*", ret_val, NULL);
+
+ if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE
+ || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE)
+ ret_val = concat ("(", ret_val, ")", NULL);
+
+ ret_val = gen_type_1 (ret_val, TREE_TYPE (t));
+
+ return ret_val;
+
+ case ARRAY_TYPE:
+ if (!COMPLETE_TYPE_P (t) || TREE_CODE (TYPE_SIZE (t)) != INTEGER_CST)
+ ret_val = gen_type_1 (concat (ret_val, "[]", NULL), TREE_TYPE (t));
+ else if (int_size_in_bytes (t) == 0)
+ ret_val = gen_type_1 (concat (ret_val, "[0]", NULL), TREE_TYPE (t));
+ else
+ {
+ int size = int_size_in_bytes (t) / int_size_in_bytes (TREE_TYPE (t));
+ char buff[10];
+ sprintf (buff, "[%d]", size);
+ ret_val = gen_type_1 (concat (ret_val, buff, NULL), TREE_TYPE (t));
+ }
+ break;
+
+ case IDENTIFIER_NODE:
+ data_type = IDENTIFIER_POINTER (t);
+ break;
+
+ case TYPE_DECL:
+ data_type = IDENTIFIER_POINTER (DECL_NAME (t));
+ break;
+
+ case INTEGER_TYPE:
+ case FIXED_POINT_TYPE:
+ switch (TYPE_PRECISION (t))
+ {
+ case 8: data_type = "char"; break;
+ case 16: data_type = "short"; break;
+ case 32: data_type = "int"; break;
+ case 64: data_type = "long"; break;
+ default: gcc_unreachable ();
+ }
+ if (TYPE_UNSIGNED (t))
+ data_type = concat ("unsigned ", data_type, NULL);
+ break;
+
+ case REAL_TYPE:
+ switch (TYPE_PRECISION (t))
+ {
+ case 32: data_type = "float"; break;
+ case 64: data_type = "double"; break;
+ default: gcc_unreachable ();
+ }
+ break;
+
+ case VOID_TYPE:
+ data_type = "void";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (TYPE_READONLY (t))
+ ret_val = concat ("const ", ret_val, NULL);
+ if (TYPE_VOLATILE (t))
+ ret_val = concat ("volatile ", ret_val, NULL);
+ if (TYPE_RESTRICT (t))
+ ret_val = concat ("restrict ", ret_val, NULL);
+ return ret_val;
+}
+
+/* Generate a string representation of a declaration of varable named
+ NAME with type T. */
+
+static const char *
+gen_type_with_name (const char *name, tree t)
+{
+ const char *type_part = gen_type_1 (name, t);
+ return concat (data_type, " ", type_part, NULL);
+}
+
+/* Replace perfect nested loop nest represented by F with opencl kernel.
+ For example, loop nest like this
+
+ | for (scat_1 = 0; scat_1 < M; i ++)
+ | for (scat_2 = 0; scat_2 < N; j ++)
+ | for (scat_3 = 0; scat_3 < L; k ++)
+ | stmt (i, j, k)
+
+ will be replased by kernel, where scat_1, scat_2, scat_3
+ depends on thread global id. Number of threads for this kernel
+ will be M * N * L. DEPTH is the depth of F in current loop nest.
+ CODE_GEN holds information related to OpenCL code generation. BODY holds
+ information current OpenCL kernel. */
+
+static void
+opencl_perfect_nested_to_kernel (opencl_main code_gen, struct clast_for *f,
+ opencl_body body, int depth)
+{
+ VEC (tree, heap) *mod = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+
+ VEC (tree, heap) *function_args = body->function_args;
+ const int perfect_depth
+ = opencl_get_perfect_nested_loop_depth (code_gen, code_gen->curr_meta, f,
+ depth, f->iterator);
+ struct clast_for *curr = f;
+ int counter = perfect_depth;
+ tree curr_base = integer_one_node;
+ basic_block calc_block = opencl_create_bb (code_gen);
+ opencl_append_string_to_body
+ ("size_t opencl_global_id = get_global_id (0);\n", code_gen);
+
+ body->first_iter = curr->iterator;
+
+ /* Iterate through all loops, which form perfect loop nest. */
+ while (counter--)
+ {
+ tree iv;
+ sese region = code_gen->region;
+ VEC (tree, heap) *newivs = code_gen->newivs;
+ htab_t newivs_index = code_gen->newivs_index;
+ htab_t params_index = code_gen->params_index;
+ const char *tmp;
+ const char *decl;
+
+ tree type = opencl_get_loop_iter_type (curr, code_gen, depth);
+
+ const char *type_str = gen_type_with_name (" ", type);
+
+ tree low_bound = clast_to_gcc_expression (type, curr->LB, region,
+ newivs, newivs_index,
+ params_index);
+
+ tree up_bound = clast_to_gcc_expression (type, curr->UB, region,
+ newivs, newivs_index,
+ params_index);
+ long stride = 1;
+ tree t_stride;
+ tree curr_loop_size;
+ int num = perfect_depth - counter - 1;
+
+ gcc_assert (curr->LB);
+ gcc_assert (curr->UB);
+
+ body->last_iter = curr->iterator;
+
+ low_bound = opencl_tree_to_var (calc_block, low_bound);
+ up_bound = opencl_tree_to_var (calc_block, up_bound);
+
+ if (mpz_cmp_si (curr->stride, 1) > 0)
+ stride = mpz_get_si (curr->stride);
+
+ t_stride = build_int_cst (NULL_TREE, stride);
+
+ curr_loop_size = build2 (MINUS_EXPR, type,
+ up_bound, low_bound);
+
+ curr_loop_size = build2 (PLUS_EXPR, type,
+ curr_loop_size,
+ fold_convert (type, integer_one_node));
+
+ if (stride != 1)
+ curr_loop_size
+ = build2 (TRUNC_DIV_EXPR, type,
+ curr_loop_size, t_stride);
+
+ curr_loop_size = opencl_tree_to_var (calc_block, curr_loop_size);
+
+ VEC_safe_push (tree, heap, mod, curr_loop_size);
+
+ iv = create_tmp_var (type, "scat_tmp_iter");
+
+ /* Declare loop iterator as local variable. */
+ opencl_append_string_to_body (type_str, code_gen);
+ opencl_append_string_to_body (curr->iterator, code_gen);
+
+ /* Calculate the value of the iterator of current loop based of the
+ number of current thread and numbers of iterators of inner loops. */
+ opencl_append_string_to_body (" = ((opencl_global_id / ", code_gen);
+ opencl_append_num_to_body (code_gen, num, "opencl_base_%d");
+ opencl_append_string_to_body (") % ", code_gen);
+ opencl_append_num_to_body (code_gen, num, "opencl_mod_%d");
+ opencl_append_string_to_body (") * ", code_gen);
+ opencl_append_num_to_body (code_gen, stride, "%d");
+ opencl_append_num_to_body (code_gen, num, " + opencl_first_%d;\n");
+
+ opencl_append_string_to_header (type_str, code_gen);
+ opencl_append_num_to_header (code_gen, num, "opencl_mod_%d, ");
+ opencl_append_string_to_header (type_str, code_gen);
+ opencl_append_num_to_header (code_gen, num, "opencl_first_%d, ");
+
+ /* Declare old loop iterator. */
+ tmp = opencl_get_var_name (iv);
+ check_and_mark_arg (code_gen, tmp, false);
+ decl = opencl_print_function_arg_with_type (tmp, type);
+ opencl_append_string_to_body (decl, code_gen);
+ opencl_append_string_to_body (" = ", code_gen);
+ opencl_append_string_to_body (curr->iterator, code_gen);
+ opencl_append_string_to_body (";\n", code_gen);
+
+ save_clast_name_index (code_gen->newivs_index, curr->iterator,
+ VEC_length (tree, code_gen->newivs));
+ VEC_safe_push (tree, heap, code_gen->newivs, iv);
+
+ /* Save number of iterations for loop. */
+ VEC_safe_push (tree, heap, function_args, curr_loop_size);
+ VEC_safe_push (tree, heap, function_args, low_bound);
+
+ body->clast_body = curr->body;
+ curr = opencl_get_single_loop_child (curr);
+ depth ++;
+ }
+
+ counter = perfect_depth;
+
+ /* Store number of iteration of inner loops for each loop in perfect
+ nest. */
+ while (counter --)
+ {
+ tree type = TREE_TYPE (curr_base);
+ const char *type_str = gen_type_with_name (" ", type);
+ tree curr = VEC_index (tree, mod, counter);
+
+ opencl_append_string_to_header (type_str, code_gen);
+ opencl_append_num_to_header (code_gen, counter, "opencl_base_%d, ");
+
+ VEC_safe_push (tree, heap, function_args, curr_base);
+
+ curr_base = build2 (MULT_EXPR, type, curr_base,
+ build1 (CONVERT_EXPR, type, curr));
+ curr_base = opencl_tree_to_var (calc_block, curr_base);
+ }
+
+ body->num_of_exec = fold_convert (integer_type_node, curr_base);
+ VEC_free (tree, heap, mod);
+}
+
+/* Generate code for loop statement F. DEPTH is the depth of F in
+ current loop nest. CODE_GEN holds information related to OpenCL
+ code generation. */
+
+static opencl_body
+opencl_print_loop (struct clast_for *f, opencl_main code_gen, int depth)
+{
+ opencl_body current_body = code_gen->current_body;
+
+ code_gen->global_defined_vars
+ = htab_create (10, htab_hash_string, opencl_cmp_str, NULL);
+
+ opencl_perfect_nested_to_kernel (code_gen, f, current_body, depth);
+
+ /* Define local loop iterators. */
+ opencl_print_local_vars (current_body->first_iter,
+ current_body->last_iter,
+ "unsigned int", code_gen);
+
+ /* Generate code for kernel body. */
+ opencl_print_stmt_list (current_body->clast_body, code_gen, depth + 1);
+ opencl_append_string_to_body ("}\n", code_gen);
+
+ if (current_body->num_of_data_writes)
+ {
+ dyn_string_t header = current_body->header;
+ int offset;
+
+ dyn_string_append (header, current_body->non_scalar_args);
+ offset = dyn_string_length (header) - 2;
+
+ if (*(dyn_string_buf (header) + offset) == ',')
+ *(dyn_string_buf (header) + offset) = ' ';
+
+ opencl_append_string_to_header (")\n{\n", code_gen);
+ }
+
+ return current_body;
+}
+
+/* Generate OpenCL code for clast_assignment A.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_assignment (struct clast_assignment *a, opencl_main code_gen)
+{
+ /* Real assignment. */
+ if (a->LHS)
+ {
+ opencl_append_string_to_body (a->LHS, code_gen);
+ opencl_append_string_to_body (" = ", code_gen);
+ }
+
+ /* Just expression. */
+ opencl_print_expr (a->RHS, code_gen);
+}
+
+/* Return tree with variable, corresponging to given clast name NAME.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static tree
+opencl_clast_name_to_tree (opencl_main code_gen, const char *name)
+{
+ return clast_name_to_gcc (name, code_gen->region, code_gen->newivs,
+ code_gen->newivs_index, code_gen->params_index);
+}
+
+/* For a given clast name return that name, if it's local name in
+ kernel body or, otherwise, name of gimple variable created for this
+ scat_i in gimple. CODE_GEN holds information related to OpenCL
+ code generation. */
+
+static const char *
+opencl_get_scat_real_name (opencl_main code_gen, const char *name)
+{
+ /* NAME > FIRST_ITER */
+ if (opencl_cmp_scat (name, code_gen->current_body->first_iter) >= 0)
+ return name;
+
+ return
+ opencl_get_var_name (opencl_clast_name_to_tree (code_gen, name));
+}
+
+/* Add clast variable (scat_i) as kernel argument. NAME is a new name
+ of loop iterator (scat_*), REAL_NAME is an old (origin) name of
+ loop iterator. CODE_GEN holds information related to OpenCL code
+ generation. */
+
+static void
+opencl_add_scat_as_arg (opencl_main code_gen, const char *name,
+ const char *real_name)
+{
+ tree var;
+ if (!check_and_mark_arg (code_gen, real_name, false))
+ return;
+ var = opencl_clast_name_to_tree (code_gen, name);
+ opencl_add_function_arg (code_gen, var, real_name);
+}
+
+/* Generate OpenCL code for user statement U. Code will be generated
+ from basic block, related to U. Also induction variables mapping
+ to old variables must be calculated to process basic block.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_user_stmt (struct clast_user_stmt *u, opencl_main code_gen)
+{
+ CloogStatement * cs;
+ poly_bb_p pbb;
+ gimple_bb_p gbbp;
+ basic_block bb;
+ int i;
+ int nb_loops = number_of_loops ();
+ code_gen->iv_map = VEC_alloc (tree, heap, nb_loops);
+
+ for (i = 0; i < nb_loops; i++)
+ VEC_safe_push (tree, heap, code_gen->iv_map, NULL_TREE);
+ build_iv_mapping (code_gen->iv_map, code_gen->region,
+ code_gen->newivs,
+ code_gen->newivs_index, u,
+ code_gen->params_index);
+
+ code_gen->defined_vars
+ = htab_create (10, htab_hash_string, opencl_cmp_str, NULL);
+ opencl_append_string_to_body ("{\n", code_gen);
+
+ cs = u->statement;
+ pbb = (poly_bb_p) cloog_statement_usr (cs);
+ gbbp = PBB_BLACK_BOX (pbb);
+ bb = GBB_BB (gbbp);
+ code_gen->context_loop = bb->loop_father;
+
+ opencl_add_data_refs_pbb (pbb, code_gen);
+ opencl_print_bb (bb, code_gen);
+ opencl_append_string_to_body ("}\n", code_gen);
+ htab_delete (code_gen->defined_vars);
+ code_gen->defined_vars = NULL;
+ VEC_free (tree, heap, code_gen->iv_map);
+}
+
+/* If tree node NODE defined in current sese build and insert define
+ statements for it, otherwise mark node as external (parameter for
+ kernel). If tree defined in current sese, also recursively build
+ defines for all trees in definition expression. */
+
+static void
+opencl_build_defines (tree node, opencl_main code_gen)
+{
+ enum tree_code code = TREE_CODE (node);
+ switch (code)
+ {
+ case SSA_NAME:
+ {
+ const char *tmp = opencl_get_var_name (node);
+ gimple def_stmt;
+
+ /* If name defined in other sese it is kernel's parameter. */
+ if (!defined_in_sese_p (node, code_gen->region))
+ return;
+
+ /* Bail out if this name was defined earlier either in this
+ or other region. */
+ if (*(const char **)htab_find_slot (code_gen->defined_vars,
+ tmp, INSERT))
+ return;
+
+ /* Get definition statement. */
+ def_stmt = SSA_NAME_DEF_STMT (node);
+ opencl_expand_scalar_vars (code_gen, def_stmt);
+ opencl_print_gimple (def_stmt, code_gen);
+ return;
+ }
+ case ARRAY_REF:
+ {
+ tree arr = TREE_OPERAND (node, 0);
+ tree offset = TREE_OPERAND (node, 1);
+ opencl_build_defines (arr, code_gen);
+ opencl_build_defines (offset, code_gen);
+ return;
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* For a given gimple statement STMT build definition for all names,
+ used in this stament. If name has been defined in other sese, mark
+ it as kernel parameter. CODE_GEN holds information related to
+ OpenCL code generation. */
+
+static void
+opencl_expand_scalar_vars (opencl_main code_gen, gimple stmt)
+{
+ ssa_op_iter iter;
+ use_operand_p use_p;
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
+ {
+ tree use = USE_FROM_PTR (use_p);
+ if (!is_gimple_reg (use))
+ continue;
+ opencl_build_defines (use, code_gen);
+ }
+}
+
+/* Generate code for a single basic block BB. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_bb (basic_block bb, opencl_main code_gen)
+{
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ opencl_expand_scalar_vars (code_gen, stmt);
+ opencl_print_gimple (stmt, code_gen);
+ }
+}
+
+/* Print operation simbol (`+' `-' `*') for assignment operation GMA.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_gimple_assign_operation (gimple gmp, opencl_main code_gen)
+{
+ opencl_append_string_to_body
+ (op_symbol_code (gimple_assign_rhs_code (gmp)), code_gen);
+}
+
+/* Print pointer expression represented by EXPR. TYPE_SIZE represents
+ size of the base type for EXPR. CODE_GEN holds information related
+ to OpenCL code generation. */
+
+static void
+opencl_print_addr_operand (tree expr, tree type_size, opencl_main code_gen)
+{
+ if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
+ {
+ opencl_append_string_to_body ("(", code_gen);
+ opencl_print_operand (expr, false, code_gen);
+ opencl_append_string_to_body ("/", code_gen);
+ opencl_print_operand (type_size, false, code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ }
+ else
+ opencl_print_operand (expr, false, code_gen);
+
+}
+
+/* Print unary gimple operation GMP. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_unary (gimple gmp, opencl_main code_gen)
+{
+ switch (gimple_assign_rhs_code (gmp))
+ {
+ case BIT_NOT_EXPR:
+ opencl_append_string_to_body ("~", code_gen);
+ return;
+ case TRUTH_NOT_EXPR:
+ opencl_append_string_to_body ("!", code_gen);
+ return;
+ case NEGATE_EXPR:
+ opencl_append_string_to_body ("-", code_gen);
+ return;
+ case MODIFY_EXPR:
+ default:
+ return;
+ }
+}
+
+/* Generate code for min or max gimple operand GMP. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_max_min_assign (gimple gmp, opencl_main code_gen)
+{
+ tree lhs = gimple_assign_lhs (gmp);
+ tree rhs1 = gimple_assign_rhs1 (gmp);
+ tree rhs2 = gimple_assign_rhs2 (gmp);
+ bool max = gimple_assign_rhs_code (gmp) == MAX_EXPR;
+
+ opencl_print_operand (lhs, true, code_gen);
+ opencl_append_string_to_body (max?" = fmax (":"= fmin (", code_gen);
+ opencl_print_operand (rhs1, false, code_gen);
+ opencl_append_string_to_body (",", code_gen);
+ opencl_print_operand (rhs2, false, code_gen);
+ opencl_append_string_to_body (");\n", code_gen);
+
+}
+
+/* Generate code for gimple assignment statement GMP. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_gimple_assign (gimple gmp, opencl_main code_gen)
+{
+ int num_of_ops = gimple_num_ops (gmp);
+ tree lhs;
+ tree rhs1;
+ tree rhs2;
+ bool addr_expr;
+ int result;
+ tree result_size = NULL;
+
+ if (gimple_assign_rhs_code (gmp) == MAX_EXPR
+ || gimple_assign_rhs_code (gmp) == MIN_EXPR)
+ {
+ opencl_print_max_min_assign (gmp, code_gen);
+ return;
+ }
+ gcc_assert (num_of_ops == 2 || num_of_ops == 3);
+ lhs = gimple_assign_lhs (gmp);
+
+ addr_expr = (TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE);
+ if (addr_expr)
+ result_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)));
+
+ rhs1 = gimple_assign_rhs1 (gmp);
+ rhs2 = gimple_assign_rhs2 (gmp);
+ result = opencl_print_operand (lhs, true, code_gen);
+ if (result != 0)
+ return;
+ opencl_append_string_to_body (" = ", code_gen);
+
+ if (addr_expr)
+ opencl_print_addr_operand (rhs1, result_size, code_gen);
+ else
+ {
+ if (rhs2 == NULL)
+ opencl_print_unary (gmp, code_gen);
+ opencl_print_operand (rhs1, false, code_gen);
+ }
+ if (rhs2 != NULL_TREE)
+ {
+ opencl_print_gimple_assign_operation (gmp, code_gen);
+ if (addr_expr)
+ opencl_print_addr_operand (rhs2, result_size, code_gen);
+ else
+ opencl_print_operand (rhs2, false, code_gen);
+ }
+ opencl_append_string_to_body (";\n",code_gen);
+}
+
+/* Generate code for arguments for gimple call statement GMP.
+ CODE_GEN hold information related to OpenCL code generation. */
+
+static void
+opencl_print_gimple_call_args (opencl_main code_gen, gimple gmp)
+{
+ size_t len = gimple_call_num_args (gmp);
+ size_t i;
+ opencl_append_string_to_body (" (",code_gen);
+ for (i = 0; i < len; i++)
+ {
+ opencl_print_operand (gimple_call_arg (gmp, i), false, code_gen);
+ if (i < len - 1)
+ opencl_append_string_to_body (", ",code_gen);
+ }
+ opencl_append_string_to_body (")",code_gen);
+}
+
+/* Replace some function names. */
+
+static const char *
+opencl_get_function_name (tree function)
+{
+ const char *gimple_name = IDENTIFIER_POINTER (DECL_NAME (function));
+ if (!strcmp (gimple_name, "__builtin_powf"))
+ return "pow";
+ return gimple_name;
+}
+
+/* Generate code for gimple call statement GMP. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_gimple_call (opencl_main code_gen, gimple gmp)
+{
+ tree lhs = gimple_call_lhs (gmp);
+ tree function = gimple_call_fn (gmp);
+ opencl_print_operand (lhs, true, code_gen);
+ opencl_append_string_to_body (" = ", code_gen);
+
+ while (TREE_CODE (function) == ADDR_EXPR
+ || TREE_CODE (function) == INDIRECT_REF)
+ function = TREE_OPERAND (function, 0);
+ opencl_append_string_to_body (opencl_get_function_name (function), code_gen);
+ opencl_print_gimple_call_args (code_gen, gmp);
+ opencl_append_string_to_body (";\n",code_gen);
+}
+
+/* Generate code for gimple statment SMP. Now only assignment
+ operation are supported, but it seems enough for clast translation.
+ GIMPLE_COND statements are loop bound conditions and can be safely
+ ignored. CODE_GEN holds information related to OpenCL code
+ generation. */
+
+static void
+opencl_print_gimple (gimple gmp, opencl_main code_gen)
+{
+ if (!gmp)
+ return;
+
+ switch (gimple_code (gmp))
+ {
+ case GIMPLE_ASSIGN:
+ opencl_print_gimple_assign (gmp, code_gen);
+ break;
+ case GIMPLE_COND:
+ break;
+ case GIMPLE_PHI:
+ break;
+ case GIMPLE_CALL:
+ opencl_print_gimple_call (code_gen, gmp);
+ break;
+ case GIMPLE_DEBUG:
+ break;
+ case GIMPLE_LABEL:
+ {
+ tree label = gimple_label_label (gmp);
+ opencl_print_operand (label, false, code_gen);
+ opencl_append_string_to_body (": ", code_gen);
+ }
+ break;
+ default:
+ debug_gimple_stmt (gmp);
+ gcc_unreachable ();
+ }
+}
+
+/* Get name of the variable, represented by tree NODE. If variable is
+ temporary, generate name for it. */
+
+static const char *
+opencl_get_var_name (tree node)
+{
+ bool ssa_name = TREE_CODE (node) == SSA_NAME;
+ tree name;
+ int num = 0;
+ if (ssa_name)
+ {
+ num = SSA_NAME_VERSION (node);
+ node = SSA_NAME_VAR (node);
+ }
+ name = DECL_NAME (node);
+ if (name)
+ {
+ if (!ssa_name)
+ return identifier_to_locale (IDENTIFIER_POINTER (name));
+ else
+ {
+ const char *base = identifier_to_locale (IDENTIFIER_POINTER (name));
+ char *buff = XNEWVEC (char, strlen (base) + 5);
+ sprintf (buff, "%s_%d", base, num);
+ return buff;
+ }
+ }
+ else
+ {
+ int tmp_var_uid = DECL_UID (node);
+ char *tmp = XNEWVEC (char, 30);
+ sprintf (tmp, "opencl_var_%d_%d", tmp_var_uid, num);
+ return tmp;
+ }
+}
+
+/* Append variable name NAME to function body. Differs from appending
+ string by replacing `.' by `_'. CODE_GEN holds information related
+ to OpenCL code generation. */
+
+static void
+opencl_append_var_name (const char *name, opencl_main code_gen)
+{
+ int len = strlen (name);
+ char *tmp = XNEWVEC (char, len + 1);
+ int i;
+ for (i = 0; i <= len; i++)
+ {
+ char tt = name[i];
+ if (tt == '.')
+ tt = '_';
+ tmp[i] = tt;
+ }
+ opencl_append_string_to_body (tmp, code_gen);
+ free (tmp);
+}
+
+/* If variable VAR_DECL is not defined and it is not marked as a
+ parameter, mark it as a parameter and add it to parameters list.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_try_variable (opencl_main code_gen, tree var_decl)
+{
+ const char *name = opencl_get_var_name (var_decl);
+ gcc_assert (code_gen->defined_vars);
+
+ if (check_and_mark_arg (code_gen, name, false))
+ opencl_add_function_arg (code_gen, var_decl, name);
+}
+
+/* Define non scalar variable, represented be DATA as either local
+ variable or kernel argument. CODE_GEN holds information related to
+ OpenCL code generation. */
+
+static void
+opencl_add_non_scalar_function_arg (opencl_main code_gen,
+ opencl_data data)
+{
+ const char *decl;
+ static int counter = 0;
+ opencl_body body = code_gen->current_body;
+ tree var = data->exact_object;
+ const char *name = opencl_get_var_name (var);
+ tree type = TREE_TYPE (var);
+
+ /* Check whether given variable can be privatized. */
+ if (data->privatized)
+ {
+ /* Define variable as local variable. */
+ gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+ decl = opencl_print_function_arg_with_type (name, type);
+ dyn_string_append_cstr (body->pre_header, decl);
+ dyn_string_append_cstr (body->pre_header, ";\n");
+ return;
+ }
+ else
+ {
+ /* Define variable as kernel argument. */
+ char decl_name [30];
+ tree main_type = opencl_get_main_type (type);
+ sprintf (decl_name, "oclFTmpArg%d", counter++);
+ decl = opencl_print_function_arg_with_type (decl_name, main_type);
+ dyn_string_append_cstr (body->non_scalar_args, "__global ");
+ opencl_add_non_scalar_type_decl (var, body->pre_header, decl_name);
+ dyn_string_append_cstr (body->non_scalar_args, decl);
+ dyn_string_append_cstr (body->non_scalar_args, ", ");
+ VEC_safe_push (opencl_data, heap, body->data_refs, data);
+ }
+}
+
+/* Register data reference REF to variable DATA. Do nothing, if it
+ has already been registered. CODE_GEN holds information related to
+ OpenCL code generation. */
+
+static void
+opencl_try_data_ref (opencl_main code_gen, data_reference_p ref,
+ opencl_data data)
+{
+ tree var = dr_outermost_base_object (ref);
+ const char *name = opencl_get_var_name (var);
+ const char ** slot;
+ gcc_assert (code_gen->defined_vars);
+
+ slot = (const char **)htab_find_slot (code_gen->global_defined_vars,
+ name, INSERT);
+ if (*slot)
+ return;
+ *slot = name;
+ opencl_add_non_scalar_function_arg (code_gen, data);
+}
+
+/* Register data reference D_REF in current kernel. CODE_GEN hold
+ information related to OpenCL code generation. */
+
+static void
+opencl_add_data_ref (opencl_main code_gen, data_reference_p d_ref)
+{
+ opencl_data tmp = opencl_get_data_by_data_ref (code_gen, d_ref);
+
+ gcc_assert (tmp);
+ if (!DR_IS_READ (d_ref))
+ {
+ bitmap_set_bit (code_gen->curr_meta->modified_on_device, tmp->id);
+ tmp->written_in_current_body = true;
+ tmp->ever_written_on_device = true;
+ code_gen->current_body->num_of_data_writes ++;
+ }
+ else
+ {
+ tmp->read_in_current_body = true;
+ tmp->ever_read_on_device = true;
+ }
+ if (!tmp->privatized)
+ tmp->used_on_device = true;
+
+ opencl_try_data_ref (code_gen, d_ref, tmp);
+}
+
+/* Add base objects of all data references in PBB as arguments to
+ current kernel. CODE_GEN holds information related to OpenCL code
+ generation. */
+
+static void
+opencl_add_data_refs_pbb (poly_bb_p pbb, opencl_main code_gen)
+{
+ VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+ int i;
+ poly_dr_p curr;
+
+ for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+ {
+ data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+ opencl_add_data_ref (code_gen, d_ref);
+ }
+}
+
+/* Generate operand for tree node NODE. If LSH is true, generated
+ operand must be lvalue, otherwise it's rvalue. CODE_GEN holds
+ information related to OpenCL code generation. Also generate
+ definitions for variables if necessary. Variable definition is not
+ necessary if variable has already been defined or if it has been
+ defined in other sese. */
+
+static int
+opencl_print_operand (tree node, bool lhs, opencl_main code_gen)
+{
+ tree scev = scalar_evolution_in_region (code_gen->region,
+ code_gen->context_loop,
+ node);
+ tree new_node = chrec_apply_map (scev, code_gen->iv_map);
+
+ if (TREE_CODE (new_node) != SCEV_NOT_KNOWN)
+ node = new_node;
+
+ switch (TREE_CODE (node))
+ {
+ case NOP_EXPR:
+ return opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+ case PLUS_EXPR:
+ {
+ if (lhs)
+ return -1;
+ opencl_append_string_to_body ("(", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+ opencl_append_string_to_body (" + ", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ return 0;
+ }
+ case MULT_EXPR:
+ {
+ if (lhs)
+ return -1;
+ opencl_append_string_to_body ("(", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+ opencl_append_string_to_body (" * ", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ return 0;
+ }
+
+ case SSA_NAME:
+ {
+ /* If rhs just add variable name. Otherwise
+ it may be necessary to add variable definition. */
+ const char *tmp = opencl_get_var_name (node);
+ if (lhs)
+ opencl_add_variable (tmp, node, code_gen);
+ else
+ opencl_append_var_name (tmp, code_gen);
+
+ /* This call adds variable declaration as formal
+ parameter in kernel header if it is necessary. */
+ opencl_try_variable (code_gen, node);
+ return 0;
+ }
+ case ARRAY_REF:
+ {
+ /* <operand>[<operand>]. */
+ tree arr = TREE_OPERAND (node, 0);
+ tree offset = TREE_OPERAND (node, 1);
+ opencl_print_operand (arr, false, code_gen);
+
+ opencl_append_string_to_body ("[", code_gen);
+ opencl_print_operand (offset, false, code_gen);
+ opencl_append_string_to_body ("]", code_gen);
+ return 0;
+ }
+ case INTEGER_CST:
+ {
+ /* Just print integer constant. */
+ unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (node);
+ if (lhs)
+ return -1;
+ if (host_integerp (node, 0))
+ opencl_append_num_to_body (code_gen, (long)low, "%ld");
+ else
+ {
+ HOST_WIDE_INT high = TREE_INT_CST_HIGH (node);
+ char buff[100];
+ buff[0] = ' ';
+ if (tree_int_cst_sgn (node) < 0)
+ {
+ buff[0] = '-';
+ high = ~high + !low;
+ low = -low;
+ }
+ sprintf (buff + 1, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
+ (unsigned HOST_WIDE_INT) high, low);
+ opencl_append_string_to_body (buff, code_gen);
+ }
+ return 0;
+ }
+ case REAL_CST:
+ {
+ char buff[100];
+ REAL_VALUE_TYPE tmp = TREE_REAL_CST (node);
+ if (lhs)
+ return -1;
+ real_to_decimal (buff, &tmp, sizeof (buff), 0, 1);
+ opencl_append_string_to_body (buff, code_gen);
+ return 0;
+ }
+ case FIXED_CST:
+ {
+ char buff[100];
+ if (lhs)
+ return -1;
+ fixed_to_decimal (buff, TREE_FIXED_CST_PTR (node), sizeof (buff));
+ opencl_append_string_to_body (buff, code_gen);
+ return 0;
+ }
+ case STRING_CST:
+ {
+ opencl_append_string_to_body ("\"", code_gen);
+ opencl_append_string_to_body (TREE_STRING_POINTER (node), code_gen);
+ opencl_append_string_to_body ("\"", code_gen);
+ return 0;
+ }
+ case VAR_DECL:
+ case PARM_DECL:
+ {
+ tree decl_name = DECL_NAME (node);
+ const char *tmp;
+ gcc_assert (decl_name);
+ tmp = IDENTIFIER_POINTER (decl_name);
+
+ opencl_append_var_name (tmp, code_gen);
+ opencl_try_variable (code_gen, node);
+ return 0;
+ }
+ case FIELD_DECL:
+ {
+ tree decl_name = DECL_NAME (node);
+ const char *tmp;
+ gcc_assert (decl_name);
+ tmp = IDENTIFIER_POINTER (decl_name);
+ opencl_append_var_name (tmp, code_gen);
+ return 0;
+ }
+ case LABEL_DECL:
+ {
+ tree decl_name = DECL_NAME (node);
+ if (decl_name)
+ {
+ const char *tmp = IDENTIFIER_POINTER (decl_name);
+ opencl_append_var_name (tmp, code_gen);
+ return 0;
+ }
+
+ if (LABEL_DECL_UID (node) != -1)
+ {
+ opencl_append_num_to_body (code_gen, (int) LABEL_DECL_UID (node),
+ "L%d");
+ return 0;
+ }
+ opencl_append_num_to_body (code_gen, (int) DECL_UID (node),
+ "D_%u");
+ return 0;
+ }
+ case INDIRECT_REF:
+ {
+ opencl_append_string_to_body ("(*", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ return 0;
+ }
+ case ADDR_EXPR:
+ {
+ opencl_append_string_to_body ("&", code_gen);
+ opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen);
+ return 0;
+ }
+ case COMPONENT_REF:
+ {
+ tree op1 = TREE_OPERAND (node, 0);
+ tree op2 = TREE_OPERAND (node, 1);
+ opencl_print_operand (op1, false, code_gen);
+
+ if (op1 && TREE_CODE (op1) == INDIRECT_REF)
+ opencl_append_string_to_body ("->", code_gen);
+ else
+ opencl_append_string_to_body (".", code_gen);
+
+ opencl_print_operand (op2, false, code_gen);
+ return 0;
+ }
+ default:
+ debug_tree (node);
+ gcc_unreachable ();
+ }
+
+ return 0;
+}
+
+/* Append variable VAR with name VAR_NAME to current function body.
+ If variable has been defined in current scope, but definition for
+ it has not been generated - then generate it's definition and mark
+ variable as defined. CODE_GEN holds information related to OpenCL
+ code generation. */
+
+static void
+opencl_add_variable (const char *var_name, tree var, opencl_main code_gen)
+{
+ const char ** slot;
+ if (htab_find (code_gen->global_defined_vars, var_name))
+ {
+ opencl_append_var_name (var_name, code_gen);
+ return;
+ }
+
+ slot = (const char **) htab_find_slot
+ (code_gen->defined_vars, var_name, INSERT);
+
+ if (! (*slot) && defined_in_sese_p (var, code_gen->region))
+ {
+ const char *decl;
+ tree type = TREE_TYPE (var);
+ *slot = var_name;
+ if (TREE_CODE (type) == POINTER_TYPE
+ || TREE_CODE (type) == ARRAY_TYPE)
+ {
+ opencl_add_non_scalar_type_decl (var, code_gen->current_body->body,
+ NULL);
+ }
+ else
+ {
+ var = SSA_NAME_VAR (var);
+ decl = opencl_print_function_arg_with_type (var_name, type);
+ opencl_append_string_to_body (decl, code_gen);
+ }
+ return;
+ }
+ opencl_append_var_name (var_name, code_gen);
+}
+
+/* Append list of names of loop iterators from CODE_GEN with same type
+ TYPE to current kernel. FIRST and LAST define outermost and
+ innermost iterators to append respectively. */
+
+static void
+opencl_print_local_vars (const char *fist, const char *last,
+ const char *type, opencl_main code_gen)
+{
+ char ** names = code_gen->root_names->_scattering;
+ int len = code_gen->root_names->_nb_scattering;
+ int i;
+ for (i = 0; i < len; i++)
+ {
+ const char *tmp = names[i];
+ if (opencl_cmp_scat (fist, tmp) <= 0
+ && opencl_cmp_scat (last, tmp) >= 0)
+ {
+ const char ** slot =
+ (const char **) htab_find_slot (code_gen->global_defined_vars,
+ tmp, INSERT);
+ *slot = tmp;
+ continue;
+ }
+
+ if (opencl_cmp_scat (fist, tmp) > 0)
+ continue;
+
+ opencl_append_string_to_body (type, code_gen);
+ opencl_append_string_to_body (" ", code_gen);
+ opencl_append_string_to_body (tmp, code_gen);
+ opencl_append_string_to_body (";\n", code_gen);
+ *((const char **)htab_find_slot (code_gen->global_defined_vars,
+ tmp, INSERT)) = tmp;
+ }
+}
+
+/* Replace all dots to underscores in string pointed to by P. Return P. */
+
+static char *
+filter_dots (char *p)
+{
+ char *s;
+ for (s = p; *s; s++)
+ if (*s == '.')
+ *s = '_';
+ return p;
+}
+
+/* Return string with varibale definition. ARG_NAME is the name of
+ the variable and TYPE is it's type. */
+
+static const char *
+opencl_print_function_arg_with_type (const char *arg_name, tree type)
+{
+ const char *decl = gen_type_with_name (arg_name, type);
+ char *ddecl;
+ ddecl = xstrdup (decl);
+ return filter_dots (ddecl);
+}
+
+/* Generate definition for non scalar variable VAR and place it to
+ string DEST. Use DECL_NAME as variable name. */
+
+static void
+opencl_add_non_scalar_type_decl (tree var, dyn_string_t dest,
+ const char *decl_name)
+{
+ tree type = TREE_TYPE (var);
+ const char *name = opencl_get_var_name (var);
+ static int counter = 0;
+ char type_name [30];
+ char *tmp_name = xstrdup (name);
+ const char *new_type;
+ tree inner_type = TREE_TYPE (type);
+
+ filter_dots (tmp_name);
+
+ sprintf (type_name, "oclFTmpType%d", counter++);
+
+ new_type = opencl_print_function_arg_with_type (type_name, inner_type);
+
+ dyn_string_append_cstr (dest, "typedef __global ");
+ dyn_string_append_cstr (dest, new_type);
+ dyn_string_append_cstr (dest, ";\n");
+
+ dyn_string_append_cstr (dest, type_name);
+ dyn_string_append_cstr (dest, " *");
+ dyn_string_append_cstr (dest, tmp_name);
+ if (decl_name != NULL)
+ {
+ dyn_string_append_cstr (dest, " = (");
+ dyn_string_append_cstr (dest, type_name);
+ dyn_string_append_cstr (dest, "*)");
+ dyn_string_append_cstr (dest, decl_name);
+ dyn_string_append_cstr (dest, ";\n");
+ }
+ free (tmp_name);
+
+}
+
+/* Check whether variable with name NAME has been defined as global or
+ local variable and mark it as defined. This function returns false
+ if variable has already been defined, otherwise it returns true. */
+
+static bool
+check_and_mark_arg (opencl_main code_gen, const char *name, bool local)
+{
+ const char ** slot;
+ gcc_assert (code_gen->defined_vars || !local);
+ if (code_gen->defined_vars)
+ {
+ slot = (const char **)htab_find_slot (code_gen->defined_vars,
+ name, INSERT);
+ if (*slot)
+ return false;
+ if (local)
+ *slot = name;
+ }
+
+ slot = (const char **)htab_find_slot (code_gen->global_defined_vars,
+ name, INSERT);
+ if (*slot)
+ return false;
+ if (!local)
+ *slot = name;
+ return true;
+}
+
+/* Add variable VAR with name NAME as function argument. Append it's
+ declaration in finction header and add it as function parameter.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_add_function_arg (opencl_main code_gen, tree var, const char *name)
+{
+ opencl_body body;
+ const char *decl;
+ tree type;
+ type = TREE_TYPE (var);
+ body = code_gen->current_body;
+ decl = opencl_print_function_arg_with_type (name, type);
+ dyn_string_append_cstr (body->header, decl);
+ dyn_string_append_cstr (body->header, ", ");
+ VEC_safe_push (tree, heap, body->function_args, var);
+}
+
+/* Generate kernel function code for clast for statement F, located on
+ depth DEPTH. CODE_GEN holds information related to OpenCL code
+ generation. */
+
+opencl_body
+opencl_clast_to_kernel (struct clast_for * f, opencl_main code_gen,
+ int depth)
+{
+ opencl_body tmp = opencl_body_create ();
+ code_gen->current_body = tmp;
+ return opencl_print_loop (f, code_gen, depth);
+}
+
+/* Generate code for clast statement S, located on depth DEPTH.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_stmt_list (struct clast_stmt *s, opencl_main code_gen, int depth)
+{
+ for ( ; s; s = s->next) {
+ gcc_assert (!CLAST_STMT_IS_A (s, stmt_root));
+ if (CLAST_STMT_IS_A (s, stmt_ass))
+ {
+ opencl_print_assignment ((struct clast_assignment *) s, code_gen);
+ opencl_append_string_to_body (";\n", code_gen);
+ }
+ else if (CLAST_STMT_IS_A (s, stmt_user))
+ opencl_print_user_stmt ((struct clast_user_stmt *) s, code_gen);
+ else if (CLAST_STMT_IS_A (s, stmt_for))
+ opencl_print_for ((struct clast_for *) s, code_gen, depth);
+ else if (CLAST_STMT_IS_A (s, stmt_guard))
+ opencl_print_guard ((struct clast_guard *) s, code_gen, depth);
+ else if (CLAST_STMT_IS_A (s, stmt_block))
+ {
+ opencl_append_string_to_body ("{\n", code_gen);
+ opencl_print_stmt_list (((struct clast_block *)s)->body, code_gen,
+ depth);
+ opencl_append_string_to_body ("}\n", code_gen);
+ }
+ else
+ gcc_unreachable ();
+ }
+}
+
+/* Generate code for clast for statement F, locate on depth LEVEL.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_for (struct clast_for *f, opencl_main code_gen, int level)
+{
+ tree iv;
+ tree iv_type;
+ const char *tmp;
+ const char *decl;
+ opencl_append_string_to_body ("for (", code_gen);
+ if (f->LB)
+ {
+ opencl_append_string_to_body (f->iterator, code_gen);
+ opencl_append_string_to_body ("=", code_gen);
+ opencl_print_expr (f->LB, code_gen);
+ }
+ opencl_append_string_to_body (";", code_gen);
+
+ if (f->UB)
+ {
+ opencl_append_string_to_body (f->iterator, code_gen);
+ opencl_append_string_to_body ("<=", code_gen);
+ opencl_print_expr (f->UB, code_gen);
+ }
+ opencl_append_string_to_body (";", code_gen);
+
+ if (value_gt_si (f->stride, 1))
+ {
+ opencl_append_string_to_body (f->iterator, code_gen);
+ opencl_append_string_to_body ("+=", code_gen);
+ opencl_append_num_to_body (code_gen, mpz_get_si (f->stride), "%d)\n{\n");
+ }
+ else
+ {
+ opencl_append_string_to_body (f->iterator, code_gen);
+ opencl_append_string_to_body ("++", code_gen);
+ opencl_append_string_to_body (")\n{\n", code_gen);
+ }
+ iv_type = opencl_get_loop_iter_type (f, code_gen, level);
+ iv = create_tmp_var (iv_type, "scat_tmp_iter");
+
+ tmp = opencl_get_var_name (iv);
+ check_and_mark_arg (code_gen, tmp, false);
+ decl = opencl_print_function_arg_with_type (tmp, iv_type);
+ opencl_append_string_to_body (decl, code_gen);
+
+ opencl_append_string_to_body (" = ", code_gen);
+ opencl_append_string_to_body (f->iterator, code_gen);
+ opencl_append_string_to_body (";\n", code_gen);
+
+ save_clast_name_index (code_gen->newivs_index, f->iterator,
+ VEC_length (tree, code_gen->newivs));
+ VEC_safe_push (tree, heap, code_gen->newivs, iv);
+
+ opencl_print_stmt_list (f->body, code_gen, level + 1);
+ opencl_append_string_to_body ("}\n", code_gen);
+}
+
+/* Generate code for clast conditional statement G, locate on depth DEPTH.
+ CODE_GEN holds information related to OpenCL code generation. */
+
+static void
+opencl_print_guard (struct clast_guard *g, opencl_main code_gen, int depth)
+{
+ int k;
+ opencl_append_string_to_body ("if ", code_gen);
+ if (g->n > 1)
+ opencl_append_string_to_body ("(", code_gen);
+ for (k = 0; k < g->n; ++k)
+ {
+ if (k > 0)
+ opencl_append_string_to_body (" && ", code_gen);
+ opencl_append_string_to_body ("(", code_gen);
+ opencl_print_equation (&g->eq[k], code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ }
+ if (g->n > 1)
+ opencl_append_string_to_body (")", code_gen);
+ opencl_append_string_to_body (" {\n", code_gen);
+ opencl_print_stmt_list (g->then, code_gen, depth);
+ opencl_append_string_to_body ("}\n", code_gen);
+}
+
+
+/* Generate code for clast equation EQ. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_equation (struct clast_equation *eq, opencl_main code_gen)
+{
+ opencl_print_expr (eq->LHS, code_gen);
+ if (eq->sign == 0)
+ opencl_append_string_to_body (" == ", code_gen);
+ else if (eq->sign > 0)
+ opencl_append_string_to_body (" >= ", code_gen);
+ else
+ opencl_append_string_to_body (" <= ", code_gen);
+ opencl_print_expr (eq->RHS, code_gen);
+}
+
+/* Generate code for clast expression E. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_expr (struct clast_expr *e, opencl_main code_gen)
+{
+ if (!e)
+ return;
+ switch (e->type)
+ {
+ case expr_term:
+ opencl_print_term ((struct clast_term*) e, code_gen);
+ break;
+ case expr_red:
+ opencl_print_reduction ((struct clast_reduction*) e, code_gen);
+ break;
+ case expr_bin:
+ opencl_print_binary ((struct clast_binary*) e, code_gen);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Generate code for clast term T. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_term (struct clast_term *t, opencl_main code_gen)
+{
+ if (t->var)
+ {
+ const char *real_name = opencl_get_scat_real_name (code_gen, t->var);
+ if (value_one_p (t->val))
+ opencl_append_var_name (real_name, code_gen);
+ else if (value_mone_p (t->val))
+ {
+ opencl_append_string_to_body ("-", code_gen);
+ opencl_append_var_name (real_name, code_gen);
+ }
+ else
+ {
+ opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d");
+ opencl_append_string_to_body ("*", code_gen);
+ opencl_append_var_name (real_name, code_gen);
+ }
+ opencl_add_scat_as_arg (code_gen, t->var, real_name);
+ }
+ else
+ opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d");
+}
+
+/* Generate code for clast reduction statement R. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_reduction (struct clast_reduction *r, opencl_main code_gen)
+{
+ switch (r->type)
+ {
+ case clast_red_sum:
+ opencl_print_sum (r, code_gen);
+ break;
+ case clast_red_min:
+ case clast_red_max:
+ if (r->n == 1)
+ {
+ opencl_print_expr (r->elts[0], code_gen);
+ break;
+ }
+ opencl_print_minmax_c (r, code_gen);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Generate code for clast sum statement R. CODE_GEN holds information
+ related to OpenCL code generation. */
+
+static void
+opencl_print_sum (struct clast_reduction *r, opencl_main code_gen)
+{
+ int i;
+ struct clast_term *t;
+
+ gcc_assert (r->n >= 1 && r->elts[0]->type == expr_term);
+ t = (struct clast_term *) r->elts[0];
+ opencl_print_term (t, code_gen);
+
+ for (i = 1; i < r->n; ++i)
+ {
+ gcc_assert (r->elts[i]->type == expr_term);
+ t = (struct clast_term *) r->elts[i];
+ if (value_pos_p (t->val))
+ opencl_append_string_to_body ("+", code_gen);
+ opencl_print_term (t, code_gen);
+ }
+}
+
+/* Generate code for clast binary operation B. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_binary (struct clast_binary *b, opencl_main code_gen)
+{
+ const char *s1 = NULL, *s2 = NULL, *s3 = NULL;
+ bool group = (b->LHS->type == expr_red
+ && ((struct clast_reduction*) b->LHS)->n > 1);
+
+ switch (b->type)
+ {
+ case clast_bin_fdiv:
+ s1 = "floor ((", s2 = ")/(", s3 = "))";
+ break;
+ case clast_bin_cdiv:
+ s1 = "ceil ((", s2 = ")/(", s3 = "))";
+ break;
+ case clast_bin_div:
+ if (group)
+ s1 = "(", s2 = ")/", s3 = "";
+ else
+ s1 = "", s2 = "/", s3 = "";
+ break;
+ case clast_bin_mod:
+ if (group)
+ s1 = "(", s2 = ")%", s3 = "";
+ else
+ s1 = "", s2 = "%", s3 = "";
+ break;
+ }
+
+ opencl_append_string_to_body (s1, code_gen);
+ opencl_print_expr (b->LHS, code_gen);
+ opencl_append_string_to_body (s2, code_gen);
+ opencl_append_num_to_body (code_gen, mpz_get_si (b->RHS), "%d");
+ opencl_append_string_to_body (s3, code_gen);
+}
+
+/* Generate code for clast min/max operation R. CODE_GEN holds
+ information related to OpenCL code generation. */
+
+static void
+opencl_print_minmax_c ( struct clast_reduction *r, opencl_main code_gen)
+{
+ int i;
+ for (i = 1; i < r->n; ++i)
+ opencl_append_string_to_body (r->type == clast_red_max ? "max (" : "min (",
+ code_gen);
+ if (r->n > 0)
+ {
+ opencl_append_string_to_body ("(unsigned int)(", code_gen);
+ opencl_print_expr (r->elts[0], code_gen);
+ opencl_append_string_to_body (")", code_gen);
+ }
+ for (i = 1; i < r->n; ++i)
+ {
+ opencl_append_string_to_body (",", code_gen);
+ opencl_append_string_to_body ("(unsigned int)(", code_gen);
+ opencl_print_expr (r->elts[i], code_gen);
+ opencl_append_string_to_body ("))", code_gen);
+ }
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,784 @@
+/* Build meta information from clast data structures for GRAPHITE-OpenCL.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file implements building meta infromation about data
+ references, supported types and operations and dependencies. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include "hashtab.h"
+#include "tree.h"
+#include "gimple-pretty-print.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+/* Something like C++ std::map<int, int>. */
+
+struct opencl_pair_def
+{
+ int id;
+ int val;
+};
+
+typedef struct opencl_pair_def * opencl_pair;
+
+/* Hash function for opencl_pair. */
+
+static hashval_t
+opencl_pair_to_hash (const void * data)
+{
+ const struct opencl_pair_def * obj = (const struct opencl_pair_def *)data;
+ return (hashval_t) (obj->id);
+}
+
+/* Compare function for opencl_pair. */
+
+static int
+opencl_pair_cmp (const void * e1, const void * e2)
+{
+ const struct opencl_pair_def * obj1 = (const struct opencl_pair_def *)e1;
+ const struct opencl_pair_def * obj2 = (const struct opencl_pair_def *)e2;
+
+ return obj1->id == obj2->id;
+}
+
+/* Create new opencl_pair with NEW_ID as id and NEW_VAL as val. */
+
+static opencl_pair
+opencl_pair_create (int new_id, int new_val)
+{
+ opencl_pair tmp = XNEW (struct opencl_pair_def);
+ tmp->id = new_id;
+ tmp->val = new_val;
+ return tmp;
+}
+
+/* Delete opencl_pair DATA. */
+
+static void
+opencl_pair_delete (opencl_pair data)
+{
+ free (data);
+}
+
+/* Create new opencl_clast_meta structure with PARENT as parent,
+ DEPTH as out_depth. If ACCESS_INIT is true, then init access bitmaps. */
+
+static opencl_clast_meta
+opencl_clast_meta_create (int depth, opencl_clast_meta parent,
+ bool access_init)
+{
+ opencl_clast_meta tmp = XNEW (struct opencl_clast_meta_def);
+ tmp->out_depth = depth;
+ tmp->in_depth = 0;
+ tmp->next = NULL;
+ tmp->body = NULL;
+ tmp->parent = parent;
+ tmp->on_device = false;
+ tmp->modified_on_host = BITMAP_ALLOC (NULL);
+ tmp->modified_on_device = BITMAP_ALLOC (NULL);
+ tmp->access_unsupported = false;
+ if (access_init)
+ {
+ tmp->can_be_private = BITMAP_ALLOC (NULL);
+ tmp->access = BITMAP_ALLOC (NULL);
+ }
+ else
+ {
+ tmp->access = NULL;
+ tmp->can_be_private = NULL;
+ }
+ return tmp;
+}
+
+/* Check whether type TYPE is supported by current graphite-opencl
+ implementation. If PTR or ARRAY is true, then TYPE can not be pointer
+ type (because only one level of pointers is supported). Also if PTR is
+ true, TYPE can not be array (because pointers to arrays are not
+ supported yet). */
+
+static bool
+opencl_supported_type_p (tree type, bool ptr, bool array)
+{
+ switch (TREE_CODE (type))
+ {
+ case POINTER_TYPE:
+ {
+ if (ptr || array)
+ return false;
+ return opencl_supported_type_p (TREE_TYPE (type), true, false);
+ }
+ case ARRAY_TYPE:
+ {
+ if (ptr)
+ return false;
+ return opencl_supported_type_p (TREE_TYPE (type), false, true);
+ }
+ case FUNCTION_DECL:
+ case FUNCTION_TYPE:
+ case COMPLEX_TYPE:
+ case RECORD_TYPE:
+ case ENUMERAL_TYPE:
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ case METHOD_TYPE:
+ case REFERENCE_TYPE:
+ return false;
+
+ case BOOLEAN_TYPE:
+ case INTEGER_TYPE:
+ case REAL_TYPE:
+ return true;
+ case VOID_TYPE:
+ return true;
+ case OFFSET_TYPE:
+ case FIXED_POINT_TYPE:
+ case VECTOR_TYPE:
+ case LANG_TYPE:
+ default:
+ debug_tree (type);
+ gcc_unreachable ();
+ }
+}
+
+/* Check whether expression ARG is supported by current graphite-opencl
+ implementation. */
+
+static bool
+opencl_supported_arg_p (opencl_main code_gen, tree arg)
+{
+ switch (TREE_CODE (arg))
+ {
+ case SSA_NAME:
+ return opencl_supported_arg_p (code_gen, SSA_NAME_VAR (arg));
+
+ case ARRAY_REF:
+ case INDIRECT_REF:
+ case ADDR_EXPR:
+ return opencl_supported_arg_p (code_gen, TREE_OPERAND (arg, 0));
+
+ case VAR_DECL:
+ case PARM_DECL:
+ {
+ tree type = TREE_TYPE (arg);
+ if (TREE_CODE (type) == POINTER_TYPE)
+ if (!opencl_get_data_by_tree (code_gen, arg))
+ return false;
+
+ return opencl_supported_type_p (type, false, false);
+ }
+
+ case INTEGER_CST:
+ case REAL_CST:
+ case POINTER_PLUS_EXPR:
+ return true;
+
+ case FIELD_DECL:
+ case COMPONENT_REF:
+ case MEM_REF:
+ case REALPART_EXPR:
+ case IMAGPART_EXPR:
+ case COMPLEX_EXPR:
+ case CALL_EXPR:
+ case RESULT_DECL:
+ return false;
+
+ default:
+ debug_tree (arg);
+ gcc_unreachable ();
+ }
+}
+
+/* Check whether gimple assignment statement GMP is supported by current
+ graphite-opencl implementation. CODE_GEN holds information about non
+ scalar arguments. */
+
+static bool
+opencl_gimple_assign_with_supported_types_p (opencl_main code_gen, gimple gmp)
+{
+ tree curr_tree;
+ int num_of_ops = gimple_num_ops (gmp);
+ gcc_assert (gimple_code (gmp) == GIMPLE_ASSIGN);
+ gcc_assert (num_of_ops == 2 || num_of_ops == 3);
+
+ curr_tree = gimple_assign_lhs (gmp);
+ if (!opencl_supported_arg_p (code_gen, curr_tree))
+ return false;
+
+ curr_tree = gimple_assign_rhs1 (gmp);
+ if (!opencl_supported_arg_p (code_gen, curr_tree))
+ return false;
+
+ if (num_of_ops == 3)
+ {
+ curr_tree = gimple_assign_rhs2 (gmp);
+ if (!opencl_supported_arg_p (code_gen, curr_tree))
+ return false;
+ }
+ return true;
+}
+
+/* Check whether all statements in basic block BB are supported by current
+ graphite-opencl implementation. CODE_GEN holds information about non
+ scalar arguments. */
+
+static bool
+opencl_supported_type_access_p (opencl_main code_gen, basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!stmt)
+ continue;
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_DEBUG:
+ case GIMPLE_COND:
+ case GIMPLE_PHI:
+ case GIMPLE_LABEL:
+ continue;
+ case GIMPLE_ASSIGN:
+ if (!opencl_gimple_assign_with_supported_types_p (code_gen, stmt))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "opencl_supported_type_access_p:"
+ " bad types in assignment\n");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+ return false;
+ }
+ continue;
+ case GIMPLE_CALL:
+ return false;
+ default:
+ debug_gimple_stmt (stmt);
+ gcc_unreachable ();
+ }
+ }
+ return true;
+}
+
+
+/* Mark variable, represented by tree OBJ as visited in bitmap VISITED.
+ If DEF is true and given variable can be privatized, mark it as
+ privatized in META. CODE_GEN holds information about non
+ scalar arguments. */
+
+static void
+opencl_def_use_data (opencl_main code_gen, tree obj, bitmap visited,
+ opencl_clast_meta meta, bool def)
+{
+ opencl_data data;
+ if (obj == NULL)
+ return;
+ data = opencl_get_data_by_tree (code_gen,
+ opencl_get_base_object_by_tree (obj));
+ if (data == NULL)
+ return;
+
+ if (!data->can_be_private)
+ return;
+
+ if (!bitmap_set_bit (visited, data->id))
+ return;
+
+ if (!def)
+ return;
+
+ bitmap_set_bit (meta->can_be_private, data->id);
+}
+
+/* Mark data in META, corresponding to basic block BB, which can be
+ privatized. CODE_GEN holds information about non
+ scalar arguments. */
+
+static void
+opencl_calc_bb_privatization (opencl_main code_gen, basic_block bb,
+ opencl_clast_meta meta)
+{
+ gimple_stmt_iterator gsi;
+ bitmap visited = BITMAP_ALLOC (NULL);
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ continue;
+ opencl_def_use_data (code_gen, gimple_assign_lhs (stmt),
+ visited, meta, true);
+
+ opencl_def_use_data (code_gen, gimple_assign_rhs1 (stmt),
+ visited, meta, false);
+
+ opencl_def_use_data (code_gen, gimple_assign_rhs2 (stmt),
+ visited, meta, false);
+ }
+ BITMAP_FREE (visited);
+}
+
+/* Analyse clast_user_stmt STMT and set read/write flags for each data
+ reference in this statement in clast meta corresponding to this
+ statement. If some data references in statement are unsupported,
+ then mark META as accessing unsupported. CODE_GEN holds
+ information about non scalar arguments. */
+
+static void
+opencl_set_meta_rw_flags (opencl_clast_meta meta,
+ struct clast_user_stmt * stmt,
+ opencl_main code_gen)
+{
+ CloogStatement *cs = stmt->statement;
+ poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
+ VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+ gimple_bb_p gbb = PBB_BLACK_BOX (pbb);
+ basic_block bb = GBB_BB (gbb);
+ int i;
+ poly_dr_p curr;
+ if (!opencl_supported_type_access_p (code_gen, bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Basic block contains unsupported "
+ "types in graphite-opencl\n");
+ dump_bb (bb, dump_file, 0);
+
+ }
+
+ meta->access_unsupported = true;
+ }
+ opencl_calc_bb_privatization (code_gen, bb, meta);
+ for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+ {
+ data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+ tree data_ref_tree = dr_outermost_base_object (d_ref);
+ opencl_data data;
+
+ if (!opencl_supported_arg_p (code_gen, data_ref_tree))
+ {
+ meta->access_unsupported = true;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Unsupported in graphite-opencl data reference\n");
+ dump_data_reference (dump_file, d_ref);
+
+ }
+ continue;
+ }
+
+ data = opencl_get_data_by_data_ref (code_gen, d_ref);
+ gcc_assert (data);
+
+ if (!data->supported)
+ meta->access_unsupported = true;
+
+ if (!graphite_outer_subscript_bound (curr, false))
+ {
+ meta->access_unsupported = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Can not determine subscript bound "
+ "for data reference\n");
+ dump_data_reference (dump_file, d_ref);
+
+ }
+
+ continue;
+ }
+
+ if (data->size_value == NULL)
+ {
+ meta->access_unsupported = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Can not determine size for data reference\n");
+ dump_data_reference (dump_file, d_ref);
+ }
+ }
+ bitmap_set_bit (meta->access, data->id);
+ }
+}
+
+/* Update META access bitmap by union of access bitmaps of it's children. */
+
+static void
+opencl_collect_definitions_info (opencl_clast_meta meta)
+{
+ opencl_clast_meta curr = meta->body->next;
+ bitmap tmp_access = BITMAP_ALLOC (NULL);
+ bitmap_copy (tmp_access, meta->body->access);
+ meta->can_be_private = BITMAP_ALLOC (NULL);
+ bitmap_copy (meta->can_be_private, meta->body->can_be_private);
+ while (curr)
+ {
+ bitmap new_defs = BITMAP_ALLOC (NULL);
+ bitmap_and_compl (new_defs, curr->can_be_private, tmp_access);
+ bitmap_ior_into (tmp_access, curr->access);
+ bitmap_ior_into (meta->can_be_private, new_defs);
+ curr = curr->next;
+ BITMAP_FREE (new_defs);
+ }
+ meta->access = tmp_access;
+}
+
+/* Build meta structure from clast structure.
+ BODY - base clast statement.
+ DEPTH - depth of BODY in whole clast structure.
+ PARENT - parent meta node.
+ CODE_GEN - data structure, which holds information
+ about non scalar arguments. */
+
+opencl_clast_meta
+opencl_create_meta_from_clast (opencl_main code_gen,
+ struct clast_stmt * body, int depth,
+ opencl_clast_meta parent)
+{
+ int max_depth = 0;
+ opencl_clast_meta result = NULL;
+ opencl_clast_meta curr = NULL;
+ struct clast_stmt * curr_stmt = body;
+ for ( ; curr_stmt; curr_stmt = curr_stmt->next)
+ {
+ opencl_clast_meta tmp_result = NULL;
+ if (CLAST_STMT_IS_A (curr_stmt, stmt_root))
+ continue;
+ if (CLAST_STMT_IS_A (curr_stmt, stmt_user))
+ {
+ tmp_result = opencl_clast_meta_create (depth, parent, true);
+ opencl_set_meta_rw_flags (tmp_result,
+ (struct clast_user_stmt*) curr_stmt,
+ code_gen);
+ }
+ if (CLAST_STMT_IS_A (curr_stmt, stmt_guard))
+ {
+ struct clast_guard * if_stmt = (struct clast_guard *)curr_stmt;
+ /* For guard (if) statement create meta for it's body and just
+ append it to current list. */
+ tmp_result = opencl_create_meta_from_clast (code_gen, if_stmt->then,
+ depth, parent);
+ }
+ if (CLAST_STMT_IS_A (curr_stmt, stmt_block))
+ {
+ struct clast_block * bl_stmt = (struct clast_block *)curr_stmt;
+ tmp_result = opencl_create_meta_from_clast (code_gen, bl_stmt->body,
+ depth, parent);
+ }
+ if (CLAST_STMT_IS_A (curr_stmt, stmt_for))
+ {
+ struct clast_for * for_stmt = (struct clast_for *) curr_stmt;
+ tmp_result = opencl_clast_meta_create (depth, parent, false);
+ tmp_result->body
+ = opencl_create_meta_from_clast (code_gen, for_stmt->body,
+ depth + 1, tmp_result);
+
+ max_depth = (max_depth > tmp_result->in_depth + 1)
+ ? max_depth : tmp_result->in_depth + 1;
+ opencl_collect_definitions_info (tmp_result);
+ }
+ if (!result)
+ curr = result = tmp_result;
+ else
+ curr->next = tmp_result;
+ while (curr->next != NULL) curr = curr->next;
+ }
+ if (parent)
+ parent->in_depth = max_depth;
+
+ return result;
+}
+
+/* For all opencl_data referenced in META calculate depth of innermost
+ reference. DEPTH is the depth of the loop, represented be META
+ in current loop nest. DATE holds intermediate resuls.
+ Function returns false iff there is access to unsupported types in
+ given META.
+ Consider an example:
+
+ | Meta_1
+ | / \
+ | / \
+ | / \
+ | / \
+ | Meta_2 Meta_3
+ | {D1,D2} / \
+ | / \
+ | / \
+ | Meta_4 Meta_5
+ | {D2,D4} |
+ | |
+ | |
+ | Meta_6
+ | {D3}
+
+ In this example D1_depth = 1, D2_depth = 2, D3_depth = 3, D4_depth = 2. */
+
+static bool
+opencl_calc_max_depth_tab (opencl_clast_meta meta, htab_t data, int depth)
+{
+ while (meta)
+ {
+ if (meta->body)
+ {
+ if (!opencl_calc_max_depth_tab (meta->body, data, depth + 1))
+ return false;
+ }
+ else
+ {
+ /* User stmt. Analyze data access. */
+ bitmap stmt_access = meta->access;
+ unsigned i;
+ bitmap_iterator bi;
+ if (meta->access_unsupported)
+ return false;
+ EXECUTE_IF_SET_IN_BITMAP (stmt_access, 0, i, bi)
+ {
+ opencl_pair curr_pair = opencl_pair_create (i, depth);
+ struct opencl_pair_def ** slot
+ = (struct opencl_pair_def **)htab_find_slot (data, curr_pair,
+ INSERT);
+ if (*slot == NULL)
+ *slot = curr_pair;
+ else
+ {
+ opencl_pair old_pair = *slot;
+ if (old_pair->val > curr_pair->val)
+ opencl_pair_delete (curr_pair);
+ else
+ {
+ *slot = curr_pair;
+ opencl_pair_delete (old_pair);
+ }
+ }
+ }
+ }
+ meta = meta->next;
+ }
+ return true;
+}
+
+/* Check whether it's reasonable to pass data, represented by OBJ,
+ to device based on information from META.
+ This function helps to avoid kernels like this.
+
+ | host_use_a_and_b ();
+ | for (int i = 0; i < N; i++)
+ | a[i] = b[i];
+
+ host_use_a_and_b ();
+
+ We have no dependency here, but memory transfer from host to devices and
+ from device to host aren't reasonable here.
+
+ We have 2 situations when memory transfer is reasonable.
+
+ 1.
+
+ | for (int j = 0; j < N; j++)
+ | {
+ | host_use_c ();
+ | for (int i = 0; i < N; i ++)
+ | a[i] = b [i] + j;
+ | }
+
+ In this case we can put memory transfer befor first loop, so
+ we will avoid situation, when all benefits from parallel execution
+ can be eliminated by memory transfer.
+
+ 2.
+
+ | for (int i = 0; i < N; i ++)
+ | for (int j = 0; j < N; j ++)
+ | {
+ | c[i][j] = 0;
+ | for (int k = 0; k < N; k ++)
+ | c[i][j] += a[i][k] * b [k][j];
+ | }
+
+ In this case each element of a,b or c is used N time on device,
+ so memory transfer is reasonable. */
+
+static bool
+opencl_evaluate_data_access_p (opencl_data obj, opencl_clast_meta meta)
+{
+ int depth = obj->depth;
+ int data_id = obj->id;
+ opencl_clast_meta parent = meta->parent;
+ if (obj->privatized)
+ return false;
+ if (depth < obj->data_dim)
+ return false;
+ if (parent)
+ {
+ /* We have outer loop. */
+ bitmap curr_bitmap = parent->modified_on_host;
+ /* Memory transfer for this statement has been placed outside
+ outer loop, so for one memory transfer will be executing more
+ then one kernel (first case). */
+ if (!bitmap_bit_p (curr_bitmap, data_id))
+ return true;
+ }
+ /* Check max depth of memory access (second case). */
+ return (depth > obj->data_dim);
+}
+
+/* Find opencl_data object by it's ID in CODE_GEN structures. */
+
+static opencl_data
+opencl_get_data_by_id (opencl_main code_gen, int id)
+{
+ VEC (opencl_data, heap) * main_data = code_gen->opencl_function_data;
+ opencl_data res = VEC_index (opencl_data, main_data, id);
+ gcc_assert (res->id == id);
+ return res;
+}
+
+/* Check whether memory transfer is reasonable if clast statement,
+ connected with META, will be replaced by opencl kernel launch.
+ ACCESS holds depth of innermost data references
+ for all data, references in statement, represented by META.
+ CODE_GEN holds information about non scalar arguments. */
+
+static bool
+opencl_analyse_data_access_p (opencl_main code_gen,
+ htab_t access,
+ opencl_clast_meta meta)
+{
+ htab_iterator h_iter;
+ opencl_pair curr;
+ int max_dim = 1;
+ int i;
+ opencl_data curr_data;
+ VEC (opencl_data, heap) * data_objs = VEC_alloc (opencl_data, heap,
+ OPENCL_INIT_BUFF_SIZE);
+
+ FOR_EACH_HTAB_ELEMENT (access, curr, opencl_pair, h_iter)
+ {
+ int id = curr->id;
+ opencl_data obj = opencl_get_data_by_id (code_gen, id);
+ VEC_safe_push (opencl_data, heap, data_objs, obj);
+ if (max_dim < obj->data_dim)
+ max_dim = obj->data_dim;
+ obj->depth = curr->val;
+ }
+
+ for (i = 0; VEC_iterate (opencl_data, data_objs, i, curr_data); i++)
+ {
+ if (curr_data->data_dim != max_dim)
+ continue;
+ if (opencl_evaluate_data_access_p (curr_data, meta))
+ return true;
+ }
+ return false;
+}
+
+/* Main predicate which checks whether statement, represented by META and
+ located on depth DEPTH, should be replaced by opencl kernel launch.
+ CODE_GEN holds information about non scalar arguments. */
+
+bool
+opencl_should_be_parallel_p (opencl_main code_gen,
+ opencl_clast_meta meta,
+ int depth)
+{
+ int i_depth = meta->in_depth;
+ htab_t max_access_depth;
+ bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
+
+ if (dump_p)
+ fprintf (dump_file, "opencl_should_be_parallel_p: ");
+
+ /* Avoid launching a lot of small kernels in a deep loop. */
+ if (!flag_graphite_opencl_no_depth_check)
+ if (depth > i_depth + opencl_base_depth_const)
+ {
+ if (dump_p)
+ fprintf (dump_file, "avoiding small kernel in deep loop\n");
+ return false;
+ }
+
+ max_access_depth = htab_create (OPENCL_INIT_BUFF_SIZE,
+ opencl_pair_to_hash,
+ opencl_pair_cmp, free);
+
+ /* Can't parallelize if statements in loop contain unsupported types. */
+ if (!flag_graphite_opencl_no_types_check)
+ if (!opencl_calc_max_depth_tab (meta, max_access_depth, 0))
+ {
+ htab_delete (max_access_depth);
+ if (dump_p)
+ fprintf (dump_file, "unsupported types\n");
+
+ return false;
+ }
+
+ /* Can't parallelize if memory transfer is not reasonable. */
+ if (!flag_graphite_opencl_no_memory_transfer_check
+ && !flag_graphite_opencl_cpu
+ && !opencl_analyse_data_access_p (code_gen, max_access_depth, meta))
+ {
+ htab_delete (max_access_depth);
+ if (dump_p)
+ fprintf (dump_file, "avoiding large memory transfer\n");
+ return false;
+ }
+
+ htab_delete (max_access_depth);
+
+ if (dump_p)
+ fprintf (dump_file, "ok\n");
+
+ return true;
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,2913 @@
+/* GRAPHITE-OpenCL pass.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* This pass implements transformation of perfect loop nests without
+ dependencies to OpenCL kernels.
+
+ For example, loop nest like this
+
+ | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i)
+ | for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1})
+ | ...
+ | for (s_j = lb_j; s_j <= ub_j; s_j += stride_j)
+ | {
+ | STMT(s_i, s_{i+1}, ..., s_j);
+ | }
+
+ will be transformed to this OpenCL kernel and all required OpenCL
+ calls will be created.
+
+ | __kernel void opencl_auto_function_N
+ | (base_i, mod_i, step_i, first_i,
+ | ...,
+ | base_j, mod_j, step_i, first_j,
+ | ...)
+ | {
+ | unsigned int global_id = get_global_id (0);
+ | unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i;
+ | unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j;
+ | STMT(s_i, s_{i+1}, ..., s_j);
+ | }
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "domwalk.h"
+#include "value-prof.h"
+#include "pointer-set.h"
+#include "gimple.h"
+#include "sese.h"
+#include "output.h"
+#include <sys/time.h>
+#include "hashtab.h"
+#include "tree.h"
+
+#ifdef HAVE_cloog
+#include "cloog/cloog.h"
+#include "ppl_c.h"
+#include "graphite-ppl.h"
+#include "graphite.h"
+#include "graphite-poly.h"
+#include "graphite-scop-detection.h"
+#include "graphite-clast-to-gimple.h"
+#include "graphite-dependences.h"
+#include "dyn-string.h"
+#include "graphite-opencl.h"
+
+/* Enum for all OpenCL functions used in GRAPHITE-OpenCL. */
+
+enum OPENCL_FUNCTUONS
+ {
+ STATIC_INIT = 0,
+ CREATE_CONTEXT_FROM_TYPE = 1,
+ GET_CONTEXT_INFO = 2,
+ CREATE_COMMAND_QUEUE = 3,
+ CREATE_PROGRAM_WITH_SOURCE = 4,
+ BUILD_PROGRAM = 5,
+ CREATE_KERNEL = 6,
+ CREATE_BUFFER = 7,
+ SET_KERNEL_ARG = 8,
+ ENQUEUE_ND_RANGE_KERNEL = 9,
+ ENQUEUE_READ_BUFFER = 10,
+ ENQUEUE_WRITE_BUFFER = 11,
+ RELEASE_MEMORY_OBJ = 12,
+ RELEASE_CONTEXT = 13,
+ RELEASE_COMMAND_QUEUE = 14,
+ RELEASE_PROGRAM = 15,
+ RELEASE_KERNEL = 16,
+ GET_PLATFORM_IDS = 17,
+ WAIT_FOR_EVENTS = 18
+ };
+
+/* Constructors and destructors. */
+static opencl_main opencl_main_create (CloogNames *, sese, edge, htab_t);
+static void opencl_main_delete (opencl_main);
+static void opencl_clast_meta_delete (opencl_clast_meta);
+static tree opencl_create_function_decl (enum OPENCL_FUNCTUONS);
+static edge opencl_create_function_call (edge);
+static void opencl_init_data (scop_p, opencl_main);
+static int opencl_get_non_scalar_type_depth (tree);
+static tree opencl_create_memory_for_pointer (opencl_data);
+static void opencl_init_basic_blocks (opencl_main);
+static edge opencl_set_context_properties (edge, tree);
+static tree opencl_create_clCreateContextFromType (tree);
+static tree opencl_create_clGetContextInfo_1 (tree);
+static void opencl_create_gimple_variables (void);
+static tree opencl_create_clCreateCommandQueue (tree);
+static tree opencl_create_malloc_call (tree);
+static edge opencl_create_init_context (edge);
+static void opencl_wait_for_event (opencl_main, tree);
+static void opencl_transform_stmt_list (struct clast_stmt *, opencl_main, int);
+static void opencl_create_gimple_for_body (opencl_body, opencl_main);
+
+/* Data structure to be used in data_reference_p to opencl_data hash
+ table. */
+struct map_ref_to_data_def
+{
+ data_reference_p key;
+ opencl_data value;
+};
+
+typedef struct map_ref_to_data_def * map_ref_to_data;
+
+/* Calculate hash value from map_ref_to_data. */
+
+static hashval_t
+map_ref_to_data_to_hash (const void * data)
+{
+ const struct map_ref_to_data_def * obj
+ = (const struct map_ref_to_data_def *)data;
+ return htab_hash_pointer (obj->key);
+}
+
+/* Compare to map_ref_to_data pointers. */
+
+static int
+map_ref_to_data_cmp (const void * v1, const void * v2)
+{
+ const struct map_ref_to_data_def * obj_1
+ = (const struct map_ref_to_data_def *)v1;
+ const struct map_ref_to_data_def * obj_2
+ = (const struct map_ref_to_data_def *)v2;
+
+ return (obj_1->key == obj_2->key);
+}
+
+/* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value. */
+
+static map_ref_to_data
+map_ref_to_data_create (data_reference_p new_key,
+ opencl_data new_value)
+{
+ map_ref_to_data tmp = XNEW (struct map_ref_to_data_def);
+ tmp->key = new_key;
+ tmp->value = new_value;
+ return tmp;
+}
+
+/* Data structure to be used in tree to opencl_data hash table. */
+
+struct map_tree_to_data_def
+{
+ tree key;
+ opencl_data value;
+};
+
+typedef struct map_tree_to_data_def * map_tree_to_data;
+
+/* Calculate hash value from map_tree_to_data. */
+
+static hashval_t
+map_tree_to_data_to_hash (const void * data)
+{
+ const struct map_tree_to_data_def * obj
+ = (const struct map_tree_to_data_def *)data;
+ return htab_hash_pointer (obj->key);
+}
+
+/* Compare to map_tree_to_data pointers. */
+
+static int
+map_tree_to_data_cmp (const void * v1, const void * v2)
+{
+ const struct map_tree_to_data_def * obj_1
+ = (const struct map_tree_to_data_def *)v1;
+ const struct map_tree_to_data_def * obj_2
+ = (const struct map_tree_to_data_def *)v2;
+
+ return (obj_1->key == obj_2->key);
+}
+
+/* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value. */
+
+static map_tree_to_data
+map_tree_to_data_create (tree new_key,
+ opencl_data new_value)
+{
+ map_tree_to_data tmp = XNEW (struct map_tree_to_data_def);
+ tmp->key = new_key;
+ tmp->value = new_value;
+ return tmp;
+}
+
+/* Create and init new temporary variable with name NAME and
+ type TYPE. */
+
+static tree
+opencl_create_tmp_var (tree type, const char * name)
+{
+ tree tmp = create_tmp_var (type, name);
+ TREE_ADDRESSABLE (tmp) = 1;
+ return tmp;
+}
+
+/* Create new var in basic block DEST to store EXPR and return it. */
+
+tree
+opencl_tree_to_var (basic_block dest, tree expr)
+{
+ tree type = TREE_TYPE (expr);
+ tree var = opencl_create_tmp_var (type, "__ocl_general_tmp_var");
+ gimple_stmt_iterator g_iter = gsi_last_bb (dest);
+
+ tree call = build2 (MODIFY_EXPR, type, var, expr);
+
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+
+ return var;
+}
+
+/* Set rw flags to false for all datas, referenced in CODE_GEN. */
+
+static void
+opencl_fflush_rw_flags (opencl_main code_gen)
+{
+ VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+ int i;
+ opencl_data curr;
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ {
+ curr->written_in_current_body = false;
+ curr->read_in_current_body = false;
+ curr->privatized = false;
+ }
+}
+
+/* Create new basic block on CODE_GEN->main edge and update it. */
+
+basic_block
+opencl_create_bb (opencl_main code_gen)
+{
+ basic_block tmp = split_edge (code_gen->main_edge);
+ code_gen->main_edge = single_succ_edge (tmp);
+ return tmp;
+}
+
+/* All kernels of current function. */
+static VEC(tree, heap) *opencl_function_kernels;
+
+/* OpenCL code for all kernels of current function. */
+static dyn_string_t main_program_src;
+
+/* Delete clast meta DATA. */
+
+static void
+opencl_clast_meta_delete (opencl_clast_meta data)
+{
+ if (!data)
+ return;
+ opencl_clast_meta_delete (data->body);
+ opencl_clast_meta_delete (data->next);
+ BITMAP_FREE (data->modified_on_device);
+ BITMAP_FREE (data->modified_on_host);
+ if (data->access != NULL)
+ BITMAP_FREE (data->access);
+ if (data->can_be_private)
+ BITMAP_FREE (data->can_be_private);
+ free (data);
+}
+
+static inline void
+opencl_verify (void)
+{
+#ifdef ENABLE_CHECKING
+ verify_loop_structure ();
+ verify_dominators (CDI_DOMINATORS);
+ verify_loop_closed_ssa (true);
+#endif
+}
+
+/* OpenCL definitions. */
+#define CL_CONTEXT_PLATFORM 0x1084
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_TRUE 1
+
+/* Names of all OpenCL functions, used in GRAPHITE-OpenCL. */
+
+static const char * opencl_function_names[] =
+ {
+ "clCreateContextFromType",
+ "clGetContextInfo",
+ "clCreateCommandQueue",
+ "clCreateProgramWithSource",
+ "clBuildProgram",
+ "clCreateKernel",
+ "clCreateBuffer",
+ "clSetKernelArg",
+ "clEnqueueNDRangeKernel",
+ "clEnqueueReadBuffer",
+ "clEnqueueWriteBuffer",
+ "clReleaseMemObject",
+ "clReleaseContext",
+ "clReleaseCommandQueue",
+ "clReleaseProgram",
+ "clReleaseKernel",
+ "clGetPlatformIDs",
+ "clWaitForEvents"
+ };
+
+#endif
+/* Variable, which holds OpenCL context. */
+static GTY(()) tree h_context;
+
+/* Variable, which holds OpenCL comman queue. */
+static GTY(()) tree h_cmd_queue;
+
+/* Variable, which holds OpenCL program for current function. */
+static GTY(()) tree h_program;
+
+#ifdef HAVE_cloog
+
+/* This vector holds opencl_data, which represents arrays.
+ Arrays have constant sizes, so buffers for each of them can
+ be created only once. */
+static VEC (opencl_data, heap) * opencl_array_data;
+
+/* Hash table, which maps opencl_data, related to arrays, to
+ trees, which represents corresponding array. */
+static htab_t array_data_to_tree;
+
+/* Check whether VAR is a zero dimension array. */
+
+static bool
+zero_dim_array_p (tree var)
+{
+ tree type = TREE_TYPE (var);
+ tree domain;
+ tree up_bound;
+
+ if (TREE_CODE (type) != ARRAY_TYPE)
+ return false;
+ if (TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+ return false;
+ domain = TYPE_DOMAIN (type);
+ if (domain == NULL)
+ return false;
+ up_bound = TYPE_MAX_VALUE (domain);
+ if (TREE_CODE (up_bound) != INTEGER_CST)
+ return false;
+ return TREE_INT_CST_LOW (up_bound) == 0;
+}
+
+/* Check whether NAME is the name of the artificial array, which can be
+ privatized. */
+
+static bool
+opencl_private_var_name_p (const char * name)
+{
+ static const char * general_reduction = "General_Reduction";
+ static const char * close_phi = "Close_Phi";
+ static const char * cross_bb = "Cross_BB_scalar_dependence";
+ static const char * commutative = "Commutative_Associative_Reduction";
+ if (!name)
+ return false;
+ return
+ ((strstr (name, general_reduction) == name)
+ || (strstr (name, close_phi) == name)
+ || (strstr (name, commutative) == name)
+ || (strstr (name, cross_bb) == name));
+}
+
+/* Check whether VAR is an artificial array, which can be privatized. */
+
+static bool
+graphite_artificial_array_p (tree var)
+{
+ tree name;
+ if (TREE_CODE (var) != VAR_DECL)
+ return false;
+ if (!zero_dim_array_p (var))
+ return false;
+ name = DECL_NAME (var);
+ if (!name)
+ return false;
+ return opencl_private_var_name_p (IDENTIFIER_POINTER (name));
+}
+
+/* Constructors & destructors.
+ <name>_create - creates a new object of such type and returns it.
+ <name>_delete - delete object (like destructor). */
+
+static opencl_data
+opencl_data_create (tree var, tree size)
+{
+ opencl_data tmp = XNEW (struct opencl_data_def);
+ tree type = TREE_TYPE (var);
+
+ tmp->can_be_private = graphite_artificial_array_p (var);
+ tmp->exact_object = var;
+
+ tmp->supported = TREE_CODE (var) == VAR_DECL || TREE_CODE (var) == SSA_NAME;
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ var = build_addr (var, current_function_decl);
+
+ tmp->data_dim = opencl_get_non_scalar_type_depth (type);
+ tmp->object = var;
+
+ tmp->size_value = size;
+ tmp->size_variable
+ = opencl_create_tmp_var (size_type_node, "__opencl_data_size");
+
+ tmp->up_to_date_on_host = true;
+ tmp->up_to_date_on_device = true;
+ tmp->used_on_device = false;
+ tmp->ever_read_on_device = false;
+ tmp->ever_written_on_device = false;
+ return tmp;
+}
+
+static void
+opencl_data_delete (opencl_data data)
+{
+ free (data);
+}
+
+static opencl_main
+opencl_main_create (CloogNames * names, sese region, edge main_edge,
+ htab_t params_index)
+{
+ opencl_main tmp = XNEW (struct graphite_opencl_creator);
+ tmp->root_names = names;
+ tmp->defined_vars = NULL;
+ tmp->global_defined_vars = NULL;
+ tmp->region = region;
+ tmp->main_edge = main_edge;
+ tmp->main_program = dyn_string_new (OPENCL_INIT_BUFF_SIZE);
+ tmp->current_body = NULL;
+ tmp->clast_meta = NULL;
+ tmp->curr_meta = NULL;
+ tmp->params_index = params_index;
+ tmp->newivs_index = htab_create (10, clast_name_index_elt_info,
+ eq_clast_name_indexes, free);
+ tmp->ref_to_data = htab_create (10, map_ref_to_data_to_hash,
+ map_ref_to_data_cmp, free);
+ tmp->tree_to_data = htab_create (10, map_tree_to_data_to_hash,
+ map_tree_to_data_cmp, free);
+ tmp->newivs = VEC_alloc (tree, heap, 10);
+ tmp->context_loop = SESE_ENTRY (region)->src->loop_father;
+ tmp->opencl_function_data = VEC_alloc (opencl_data, heap,
+ OPENCL_INIT_BUFF_SIZE);
+ return tmp;
+}
+
+static void
+opencl_main_delete (opencl_main data)
+{
+ int i;
+ opencl_data curr;
+ dyn_string_delete (data->main_program);
+ htab_delete (data->newivs_index);
+ htab_delete (data->ref_to_data);
+ htab_delete (data->tree_to_data);
+ opencl_clast_meta_delete (data->clast_meta);
+ for (i = 0; VEC_iterate (opencl_data, data->opencl_function_data, i, curr);
+ i++)
+ if (!curr->is_static)
+ opencl_data_delete (curr);
+ VEC_free (tree, heap, data->newivs);
+ VEC_free (opencl_data, heap, data->opencl_function_data);
+ free (data);
+}
+
+/* Get depth of type TYPE scalar (base) part. */
+
+static int
+opencl_get_non_scalar_type_depth (tree type)
+{
+ int count = 0;
+ while (TREE_CODE (type) == ARRAY_TYPE
+ || TREE_CODE (type) == POINTER_TYPE)
+ {
+ count++;
+ type = TREE_TYPE (type);
+ }
+ return count;
+}
+
+/* Add function call CALL to edge SRC. If FLAG_GRAPHITE_OPENCL_DEBUG is
+ enabled, then add the following:
+
+ | int result = call ();
+ | if (call == 0 != ZERO_RETURN)
+ | abort ();
+
+ Otherwise just add CALL as function call. */
+
+static edge
+opencl_add_safe_call_on_edge (tree call, bool zero_return, edge src)
+{
+ if (!flag_graphite_opencl_debug)
+ {
+ basic_block bb = split_edge (src);
+ gimple_stmt_iterator g_iter = gsi_last_bb (bb);
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ return single_succ_edge (bb);
+ }
+ else
+ {
+ tree cmp;
+ edge result;
+ basic_block abort_bb;
+ tree abort_funtion;
+ tree abort_call;
+ gimple_stmt_iterator g_iter;
+ if (zero_return)
+ {
+ tree correct_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
+ integer_zero_node);
+ cmp = build2 (EQ_EXPR, boolean_type_node,
+ call, correct_result);
+ }
+ else
+ {
+ tree incorrect_result = build1 (CONVERT_EXPR, TREE_TYPE (call),
+ null_pointer_node);
+
+ cmp = build2 (NE_EXPR, boolean_type_node,
+ call, incorrect_result);
+ }
+ result = create_empty_if_region_on_edge (src, cmp);
+ abort_bb = get_false_edge_from_guard_bb (src->dest)->dest;
+ abort_funtion = build_fn_decl ("abort", build_function_type_list
+ (void_type_node, NULL_TREE));
+
+ abort_call = build_call_expr (abort_funtion, 0);
+
+ g_iter = gsi_last_bb (abort_bb);
+ force_gimple_operand_gsi (&g_iter, abort_call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ return result;
+ }
+}
+
+/* Add call CALL to the current edge from CODE_GEN. ZERO_RETURN indicates
+ whether NULL or 0 is the success return value for CALL. */
+
+static void
+opencl_add_safe_call (opencl_main code_gen, tree call, bool zero_return)
+{
+ code_gen->main_edge
+ = opencl_add_safe_call_on_edge (call, zero_return, code_gen->main_edge);
+}
+
+/* Get base object for OBJ. */
+
+tree
+opencl_get_base_object_by_tree (tree obj)
+{
+ while (TREE_CODE (obj) == INDIRECT_REF
+ || TREE_CODE (obj) == ARRAY_REF)
+ obj = TREE_OPERAND (obj, 0);
+ return obj;
+}
+
+/* Get base object from data reference DR. */
+
+tree
+dr_outermost_base_object (data_reference_p dr)
+{
+ tree addr = DR_BASE_ADDRESS (dr);
+ if (!addr)
+ {
+ /* In case, we don't know base object. For example:
+
+ | void foo (int * a)
+ | {
+ | int * b = a + 1;
+ | *b = 0;
+ | }
+
+ Just return the innermost object when the base address is unknown. */
+ tree ref = DR_REF (dr);
+ return opencl_get_base_object_by_tree (ref);
+ }
+
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ return addr;
+}
+
+/* Get correct basic block for data with DATA_ID transfer. If DEVICE
+ is true, then it's host to device transfer, otherwise it's device
+ to host transfer. CODE_GEN holds information related to code
+ generation. */
+
+static edge
+opencl_get_edge_for_init (opencl_main code_gen, int data_id, bool device)
+{
+ opencl_clast_meta curr = code_gen->curr_meta;
+ if (!curr)
+ return NULL;
+
+ while (curr->parent)
+ {
+ opencl_clast_meta parent = curr->parent;
+ bitmap curr_bitmap
+ = device ? parent->modified_on_host : parent->modified_on_device;
+ if (bitmap_bit_p (curr_bitmap, data_id))
+ break;
+ curr = curr->parent;
+ }
+ return curr->init_edge;
+}
+
+/* Add host to device memory transfer. DATA - data, which must be
+ transfered to device. CODE_GEN holds information related to code
+ generation. */
+
+static tree
+opencl_pass_to_device (opencl_main code_gen, opencl_data data)
+{
+ edge init_edge;
+ tree function;
+ tree call;
+
+ tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+ tree array_type = build_array_type (ptr_type_node, index_type);
+ tree var = opencl_create_tmp_var (array_type, "wait_event");
+ tree event_call;
+
+ TREE_STATIC (var) = 1;
+ assemble_variable (var, 1, 0, 1);
+
+ /* If data is wrutten in device, mark it is not up to date on host. */
+ if (data->written_in_current_body)
+ data->up_to_date_on_host = false;
+
+ /* If data is up to date on device, but it was initialized befor
+ current loop, then mark it as initialized in current loop and
+ store it.
+
+ Consider an example: D - device, H - host, W - write, R - read.
+
+ | HW(1) -- LOOP
+ | /\
+ | / \
+ | / \
+ | HR(2) DW(3)
+
+ While analyzing statement (2), data will be up to date on host
+ because of statement (1), but while executing after (3) in loop,
+ (2) will read incorrect data.
+
+ So, we have to add device to host memory transfer after statement (3).
+ */
+ if (flag_graphite_opencl_cpu)
+ return data->device_object;
+
+ if (data->up_to_date_on_device)
+ {
+ if (!data->inited_in_current_loop_on_device
+ && code_gen && code_gen->curr_meta
+ && code_gen->curr_meta->parent)
+ VEC_safe_push (opencl_data, heap,
+ code_gen->curr_meta->parent->post_pass_to_device,
+ data);
+
+ data->inited_in_current_loop_on_device = true;
+ return data->device_object;
+ }
+
+ data->inited_in_current_loop_on_device = true;
+ init_edge = opencl_get_edge_for_init (code_gen, data->id, true);
+
+ /* Add gimple. */
+ function = opencl_create_function_decl (ENQUEUE_WRITE_BUFFER);
+
+ event_call = build4 (ARRAY_REF, ptr_type_node, var,
+ integer_zero_node, NULL_TREE, NULL_TREE);
+ event_call = build_addr (event_call, current_function_decl);
+ call = build_call_expr (function, 9,
+ h_cmd_queue,
+ data->device_object,
+ build_int_cst (NULL_TREE, CL_TRUE),
+ integer_zero_node,
+ data->size_variable,
+ data->object,
+ integer_zero_node,
+ null_pointer_node,
+ event_call);
+ if (init_edge)
+ opencl_add_safe_call_on_edge (call, true, init_edge);
+ else
+ opencl_add_safe_call (code_gen, call, true);
+ data->up_to_date_on_device = true;
+ opencl_wait_for_event (code_gen, event_call);
+ return data->device_object;
+}
+
+/* Add device to host memory transfer. DATA - data, which must be
+ transfered to host. CODE_GEN holds information related to code
+ generation. */
+
+static void
+opencl_pass_to_host (opencl_main code_gen, opencl_data data)
+{
+ edge init_edge;
+ tree function;
+ tree curr_type;
+ tree curr;
+ tree call;
+
+ tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+ tree array_type = build_array_type (ptr_type_node, index_type);
+ tree var = opencl_create_tmp_var (array_type, "wait_event");
+ tree event_call;
+
+ TREE_STATIC (var) = 1;
+ assemble_variable (var, 1, 0, 1);
+ if (data->written_in_current_body)
+ data->up_to_date_on_device = false;
+ if (data->up_to_date_on_host)
+ {
+ if (!data->inited_in_current_loop_on_host
+ && code_gen && code_gen->curr_meta &&
+ code_gen->curr_meta->parent)
+ VEC_safe_push (opencl_data, heap,
+ code_gen->curr_meta->parent->post_pass_to_host, data);
+
+ data->inited_in_current_loop_on_host = true;
+ return;
+ }
+
+ data->inited_in_current_loop_on_host = true;
+
+ if (flag_graphite_opencl_cpu)
+ return;
+
+ if (data->privatized)
+ return;
+
+ init_edge = opencl_get_edge_for_init (code_gen, data->id, false);
+
+ function = opencl_create_function_decl (ENQUEUE_READ_BUFFER);
+ curr_type = TREE_TYPE (data->object);
+ curr = data->object;
+
+ if (TREE_CODE (curr_type) == ARRAY_TYPE)
+ curr = build_addr (curr, current_function_decl);
+
+ event_call = build4 (ARRAY_REF, ptr_type_node, var,
+ integer_zero_node, NULL_TREE, NULL_TREE);
+ event_call = build_addr (event_call, current_function_decl);
+
+
+ call = build_call_expr (function, 9,
+ h_cmd_queue,
+ data->device_object,
+ build_int_cst (NULL_TREE, CL_TRUE),
+ integer_zero_node,
+ data->size_variable,
+ curr, integer_zero_node,
+ null_pointer_node,
+ event_call);
+
+ if (init_edge)
+ opencl_add_safe_call_on_edge (call, true, init_edge);
+ else
+ opencl_add_safe_call (code_gen, call, true);
+ opencl_wait_for_event (code_gen, event_call);
+ data->up_to_date_on_host = true;
+}
+
+/* Pass all data from device to host. This function must be called when
+ we need all data to be up to date on host. CODE_GEN holds information
+ related to code generation. */
+
+static void
+opencl_fflush_all_device_buffers_to_host (opencl_main code_gen)
+{
+ VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+ int i;
+ opencl_data curr;
+ tree function = opencl_create_function_decl (RELEASE_MEMORY_OBJ);
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ {
+ curr->written_in_current_body = true;
+ opencl_pass_to_host (code_gen, curr);
+ }
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ {
+ if (curr->used_on_device && !curr->is_static)
+ {
+ tree var = curr->device_object;
+ tree call = build_call_expr (function, 1, var);
+ opencl_add_safe_call (code_gen, call, true);
+ }
+ }
+}
+
+/* Create memory buffers on host for all required host memory objects.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_init_all_device_buffers (opencl_main code_gen)
+{
+ VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+ int i;
+ opencl_data curr;
+ edge data_init_edge = single_succ_edge (code_gen->data_init_bb);
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ {
+ tree tmp;
+ if (!curr->used_on_device || curr->is_static)
+ continue;
+
+ tmp = opencl_create_memory_for_pointer (curr);
+ tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
+ data_init_edge = opencl_add_safe_call_on_edge (tmp, false,
+ data_init_edge);
+ }
+}
+
+/* Create new static void * variable with name __ocl_ + NAME. */
+
+static tree
+opencl_create_static_ptr_variable (const char * name)
+{
+ const char * id_name = concat ("__ocl_",name, NULL);
+ tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ create_tmp_var_name (id_name), ptr_type_node);
+ TREE_STATIC (var) = 1;
+ TREE_PUBLIC (var) = 0;
+ DECL_ARTIFICIAL (var) = 1;
+ TREE_USED (var) = 1;
+ TREE_ADDRESSABLE (var) = 1;
+ DECL_INITIAL (var) = null_pointer_node;
+ assemble_variable (var, 1, 0, 1);
+
+ return var;
+}
+
+/* Insert several opencl calls to output program. */
+
+/* | cl_program h_program;
+ | h_program = clCreateProgramWithSource (h_context, 1,
+ | sProgramSource, 0, 0);
+
+ SRC is the program source code and DEST is the edge where
+ call must be inserted. */
+
+static edge
+opencl_insert_create_program_with_source_call (const char * src, edge base)
+{
+ /* Get tree with function definition. */
+ tree function = opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE);
+ tree code_tree = build_string_literal (strlen (src) + 1, src);
+ tree call;
+ basic_block bb = split_edge (base);
+ tree tmp_var = opencl_tree_to_var (bb, code_tree);
+
+ call = build_call_expr (function, 5, h_context,
+ integer_one_node,
+ build_addr (tmp_var, current_function_decl),
+ null_pointer_node,
+ null_pointer_node);
+
+ call = build2 (MODIFY_EXPR, ptr_type_node,
+ h_program, call);
+ return opencl_add_safe_call_on_edge (call, false, single_succ_edge (bb));
+}
+
+/* clBuildProgram (h_program, 0, 0, 0, 0, 0);
+ BASE is the edge where call must be inserted. */
+
+static edge
+opencl_insert_build_program_call (edge base)
+{
+ tree function = opencl_create_function_decl (BUILD_PROGRAM);
+ tree call = build_call_expr (function, 6,
+ h_program,
+ integer_zero_node, null_pointer_node,
+ null_pointer_node, null_pointer_node,
+ null_pointer_node);
+ return opencl_add_safe_call_on_edge (call, true, base);
+}
+
+/* cl_kernel tmm_kernel;
+ tmp_kernel = clCreateKernel (h_program, func_name, 0);
+ FUNCTION_NAME is the name of the kernel function,
+ CODE_GEN holds information related to code generation. */
+
+static tree
+opencl_insert_create_kernel_call (opencl_main code_gen,
+ const char *function_name)
+{
+ tree new_kernel_var;
+ basic_block bb;
+ gimple_stmt_iterator g_iter;
+ tree function;
+ tree kernel_name;
+ tree call;
+ tree tmp_tree;
+
+ new_kernel_var = opencl_create_tmp_var (ptr_type_node, function_name);
+ bb = split_edge (code_gen->kernel_edge);
+ g_iter = gsi_last_bb (bb);
+ function = opencl_create_function_decl (CREATE_KERNEL);
+ kernel_name = build_string_literal (strlen (function_name) + 1,
+ function_name);
+ call = build_call_expr (function, 3, h_program, kernel_name,
+ null_pointer_node);
+ tmp_tree = build2 (MODIFY_EXPR, ptr_type_node,
+ new_kernel_var, call);
+
+ code_gen->kernel_edge = single_succ_edge (bb);
+ force_gimple_operand_gsi (&g_iter, tmp_tree, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ VEC_safe_push (tree, heap, opencl_function_kernels, new_kernel_var);
+ code_gen->kernel_edge
+ = opencl_add_safe_call_on_edge (new_kernel_var, false,
+ code_gen->kernel_edge);
+ return new_kernel_var;
+}
+
+/* Init memory on device. Only one levell of pointers are suppoted.
+ So in case of char ** only array of char * will be created.
+ Function return tree, corresponding to new pointer (pointer
+ on device).
+
+ | cl_mem clCreateBuffer (cl_context context,
+ | cl_mem_flags flags,
+ | size_t size,
+ | void *host_ptr,
+ | cl_int *errcode_ret) */
+
+/* Calculate size of data reference, represented by REF. PTR is a
+ base object of data reference. */
+
+static tree
+opencl_get_indirect_size (tree ptr, poly_dr_p ref)
+{
+ ptr = TREE_TYPE (ptr);
+ switch (TREE_CODE (ptr))
+ {
+ case ARRAY_TYPE:
+ return TYPE_SIZE_UNIT (ptr);
+
+ case POINTER_TYPE:
+ {
+ tree inner_type = TREE_TYPE (ptr);
+ tree t = graphite_outer_subscript_bound (ref, false);
+ tree inner_type_size = TYPE_SIZE_UNIT (inner_type);
+ if (inner_type_size == NULL)
+ return NULL;
+
+ if (DECL_P (inner_type_size))
+ add_referenced_var (inner_type_size);
+
+ gcc_assert (t);
+ t = fold_build2 (TRUNC_DIV_EXPR, sizetype, t, inner_type_size);
+ t = fold_build2 (PLUS_EXPR, sizetype, t, size_one_node);
+ t = fold_build2 (MULT_EXPR, sizetype, t, inner_type_size);
+ return t;
+ }
+ default:
+ return NULL_TREE;
+ }
+ gcc_unreachable ();
+}
+
+/* Calculate correct flags for clCreateBuffer. READ means, that
+ buffer must be readable on device, WRITE - that buffer must be
+ writable on device. */
+
+static int
+opencl_get_mem_flags (bool read, bool write)
+{
+ int rw_flags;
+ int location_flags;
+ gcc_assert (read || write);
+ if (write && read)
+ rw_flags = CL_MEM_READ_WRITE;
+ else
+ {
+ if (read)
+ rw_flags = CL_MEM_READ_ONLY;
+ else
+ rw_flags = CL_MEM_WRITE_ONLY;
+ }
+ if (flag_graphite_opencl_cpu)
+ location_flags = CL_MEM_USE_HOST_PTR;
+ else
+ location_flags = CL_MEM_COPY_HOST_PTR;
+ return location_flags | rw_flags;
+}
+
+/* Create memory on device for DATA and init it by data from host.
+ ptr is pointer to host memory location. Function returns tree,
+ corresponding to memory location on device. */
+
+static tree
+opencl_create_memory_for_pointer (opencl_data data)
+{
+ tree ptr = data->object;
+ tree arr_size = data->size_variable;
+ tree function = opencl_create_function_decl (CREATE_BUFFER);
+ bool ever_read = data->ever_read_on_device;
+ bool ever_written = data->ever_written_on_device;
+ tree mem_flags = build_int_cst (NULL_TREE,
+ opencl_get_mem_flags (ever_read,
+ ever_written));
+ if (TREE_CODE (TREE_TYPE (ptr)) == ARRAY_TYPE)
+ ptr = build_addr (ptr, current_function_decl);
+
+ if (flag_graphite_opencl_debug)
+ {
+ tree result = opencl_create_tmp_var (integer_type_node,
+ "__opencl_create_buffer_result");
+
+ return build_call_expr (function, 5,
+ h_context, mem_flags,
+ arr_size, ptr,
+ build1 (ADDR_EXPR,
+ integer_ptr_type_node,
+ result));
+ }
+ else
+ return build_call_expr (function, 5,
+ h_context, mem_flags,
+ arr_size, ptr, null_pointer_node);
+}
+
+/* Create variables for kernel KERNEL arguments. Each argument is
+ represented by new variable with it's value and it's size. If arg
+ is a pointer or array, it's represented by device buffer with data
+ from host memory. CODE_GEN holds information related to code
+ generation. */
+
+static void
+opencl_init_local_device_memory (opencl_main code_gen, opencl_body kernel)
+{
+ VEC (tree, heap) ** args = &kernel->function_args;
+ VEC (tree, heap) ** args_to_pass = &kernel->function_args_to_pass;
+ VEC (opencl_data, heap) ** refs = &kernel->data_refs;
+ tree curr;
+ opencl_data curr_data;
+ int i;
+ basic_block bb = opencl_create_bb (code_gen);
+ basic_block kernel_bb = split_edge (code_gen->kernel_edge);
+ code_gen->kernel_edge = single_succ_edge (kernel_bb);
+ for (i = 0; VEC_iterate (tree, *args, i, curr); i ++)
+ {
+ gimple_stmt_iterator g_iter = gsi_last_bb (bb);
+ gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
+ tree curr_type = TREE_TYPE (curr);
+ tree new_type;
+ tree tmp_var;
+ tree mov;
+ tree curr_var = opencl_create_tmp_var (curr_type, "__ocl_iv");
+ if (TREE_CODE (curr) != PARM_DECL
+ && TREE_CODE (curr) != VAR_DECL)
+ {
+ mov = build2 (MODIFY_EXPR, curr_type, curr_var, curr);
+
+ force_gimple_operand_gsi (&g_iter, mov, false, NULL, false,
+ GSI_CONTINUE_LINKING);
+ }
+ else
+ force_gimple_operand_gsi (&g_iter, curr, false, curr_var, false,
+ GSI_CONTINUE_LINKING);
+ curr = curr_var;
+
+ new_type = build_pointer_type (curr_type);
+ tmp_var = opencl_create_tmp_var (new_type, "__opencl_scalar_arg");
+ mov = build1 (ADDR_EXPR, new_type, curr);
+
+ mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
+
+ force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
+ GSI_CONTINUE_LINKING);
+ VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
+ }
+ for (i = 0; VEC_iterate (opencl_data, *refs, i, curr_data); i++)
+ {
+ gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb);
+ tree new_type;
+ tree tmp_var;
+ tree mov;
+
+ tree curr = opencl_pass_to_device (code_gen, curr_data);
+ tree curr_type = ptr_type_node;
+
+ new_type = build_pointer_type (curr_type);
+ tmp_var = opencl_create_tmp_var (new_type, "__opencl_non_scalar_arg");
+ mov = build1 (ADDR_EXPR, new_type, curr);
+
+ mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov);
+
+ force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false,
+ GSI_CONTINUE_LINKING);
+ VEC_safe_push (tree, heap, *args_to_pass, tmp_var);
+
+ }
+}
+
+/* cl_int clSetKernelArg (cl_kernel kernel,
+ cl_uint arg_index,
+ size_t arg_size,
+ const void *arg_value)
+
+ Set all kernel args for OpenCL kernel, represented by KERNEL_VAR.
+ KERNEL holds all data, related to given kernel.
+ CODE_GEN holds information related to code generation.
+ All arguments are passed by pointer. */
+
+static void
+opencl_pass_kernel_arguments (opencl_main code_gen, opencl_body kernel,
+ tree kernel_var)
+{
+ VEC (tree, heap) * args_to_pass = kernel->function_args_to_pass;
+ tree arg;
+ int i;
+ tree function = opencl_create_function_decl (SET_KERNEL_ARG);
+ for (i = 0; VEC_iterate (tree, args_to_pass, i, arg); i++)
+ {
+ tree call
+ = build_call_expr (function, 4, kernel_var,
+ build_int_cst (NULL_TREE, i),
+ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg))),
+ arg);
+ opencl_add_safe_call (code_gen, call, true);
+ }
+}
+
+/* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0,
+ &cnDimension, 0, 0, 0, 0);
+
+ Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads.
+ Use EVENT_VAR as event variable for asynchronous call.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_execute_kernel (opencl_main code_gen, tree num_of_exec,
+ tree kernel_var, tree event_var)
+{
+ tree function = opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL);
+ tree num_of_threads = opencl_create_tmp_var (integer_type_node,
+ "__opencl_num_of_exec");
+ gimple_stmt_iterator g_iter = gsi_last_bb (opencl_create_bb (code_gen));
+ tree call;
+
+ TREE_STATIC (num_of_threads) = 1;
+ assemble_variable (num_of_threads, 1, 0, 1);
+
+ call = build2 (MODIFY_EXPR, integer_type_node, num_of_threads, num_of_exec);
+
+
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+
+ call = build1 (ADDR_EXPR, integer_ptr_type_node, num_of_threads);
+
+ call = build_call_expr (function, 9,
+ h_cmd_queue,
+ kernel_var,
+ integer_one_node,
+ null_pointer_node,
+ call,
+ null_pointer_node,
+ integer_zero_node,
+ null_pointer_node,
+ event_var);
+
+ opencl_add_safe_call (code_gen, call, true);
+}
+
+/* Place building program from single source string to edge BASE.
+ Current implementation performs single build per function.
+ String contains kernels from all scops of current function.
+ Functions returns true if any kernel has been created. */
+
+static edge
+opencl_create_function_call (edge base)
+{
+ edge new_edge;
+ const char * src;
+
+ /* Required for addressing types with size less then 4 bytes. */
+ dyn_string_prepend_cstr
+ (main_program_src,
+ "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n");
+ /* Required for double type. */
+ dyn_string_prepend_cstr
+ (main_program_src, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n ");
+ src = dyn_string_buf (main_program_src);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nGenerated OpenCL code: \n");
+ fprintf (dump_file, "%s", src);
+ }
+ new_edge = opencl_insert_create_program_with_source_call (src, base);
+
+ return opencl_insert_build_program_call (new_edge);
+}
+
+/* Mark privatizable data for current loop nest. Information where
+ given data can be privatized is taken from meta information of
+ current loop nest, which is stored in CODE_GEN. */
+
+static void
+opencl_mark_privatized_data (opencl_main code_gen)
+{
+ VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+ int i;
+ opencl_data curr;
+ bitmap can_be_private = code_gen->curr_meta->can_be_private;
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ curr->privatized = bitmap_bit_p (can_be_private, curr->id);
+
+}
+
+/* Store calculated sizes of all pointers or arrays to variables.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_set_data_size (opencl_main code_gen)
+{
+ VEC (opencl_data, heap) * datas = code_gen->opencl_function_data;
+ int i;
+ opencl_data curr;
+ gimple_stmt_iterator g_iter = gsi_last_bb (code_gen->data_init_bb);
+ for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++)
+ {
+ tree call;
+ if (curr->is_static)
+ continue;
+ if (!curr->used_on_device)
+ continue;
+
+ if (curr->size_value == NULL)
+ continue;
+
+ call = build2 (MODIFY_EXPR, size_type_node,
+ curr->size_variable, curr->size_value);
+
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ }
+}
+
+/* Transform clast statement DATA from scop SCOP to OpenCL calls
+ in region REGION. Place all calls to edge MAIN. PARAM_INDEX
+ holds external scop params. */
+
+void
+opencl_transform_clast (struct clast_stmt * data, sese region,
+ edge main, scop_p scop, htab_t params_index)
+{
+ opencl_main code_gen;
+ /* Create main data struture for code generation. */
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nGenerating OpenCL code for SCoP: \n");
+ print_scop (dump_file, scop, 0);
+ }
+
+ code_gen = opencl_main_create (((struct clast_root *)data)->names,
+ region, main, params_index);
+
+ opencl_init_basic_blocks (code_gen);
+ opencl_init_data (scop, code_gen);
+
+ code_gen->clast_meta = opencl_create_meta_from_clast (code_gen, data, 1,
+ NULL);
+ code_gen->curr_meta = code_gen->clast_meta;
+
+ opencl_transform_stmt_list (data, code_gen, 1);
+ if (dyn_string_length (code_gen->main_program) != 0)
+ {
+ dyn_string_append (main_program_src, code_gen->main_program);
+ opencl_set_data_size (code_gen);
+ opencl_init_all_device_buffers (code_gen);
+ opencl_fflush_all_device_buffers_to_host (code_gen);
+ }
+ recompute_all_dominators ();
+ update_ssa (TODO_update_ssa);
+ opencl_main_delete (code_gen);
+}
+
+/* Prepare memory for gimple (host) statement, represented by PBB.
+ Copy memory from device to host if it's nessesary.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb, opencl_main code_gen)
+{
+ VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+ int i;
+ poly_dr_p curr;
+ for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+ {
+ data_reference_p d_ref = (data_reference_p) PDR_CDR (curr);
+ opencl_data data;
+ bool is_read;
+
+ /* Scalar variables can be passed directly. */
+ data = opencl_get_data_by_data_ref (code_gen, d_ref);
+ /* Private variables should not be passed from device to host. */
+ if (data->privatized)
+ continue;
+ is_read = DR_IS_READ (d_ref);
+ gcc_assert (data);
+
+ data->read_in_current_body = is_read;
+ data->written_in_current_body = !is_read;
+ opencl_pass_to_host (code_gen, data);
+
+ if (!is_read)
+ bitmap_set_bit (code_gen->curr_meta->modified_on_host, data->id);
+ }
+}
+
+/* Add basic block from clast_user_stmt STMT to gimple.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_add_gimple_for_user_stmt (struct clast_user_stmt * stmt,
+ opencl_main code_gen)
+{
+ gimple_bb_p gbb;
+ CloogStatement *cs = stmt->statement;
+ poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
+ sese region = code_gen->region;
+ int nb_loops = number_of_loops ();
+ int i;
+ VEC (tree, heap) * iv_map = VEC_alloc (tree, heap, nb_loops);
+ htab_t newivs_index = code_gen->newivs_index;
+ VEC (tree, heap) * newivs = code_gen->newivs;
+ /* Get basic block to add. */
+ gbb = PBB_BLACK_BOX (pbb);
+
+ if (GBB_BB (gbb) == ENTRY_BLOCK_PTR)
+ return;
+
+ /*Reset flags. */
+ opencl_fflush_rw_flags (code_gen);
+
+ /* Pass all required memory to host. */
+ opencl_prepare_memory_for_gimple_stmt (pbb, code_gen);
+
+ for (i = 0; i < nb_loops; i++)
+ VEC_quick_push (tree, iv_map, NULL_TREE);
+
+ build_iv_mapping (iv_map, region, newivs, newivs_index,
+ stmt, code_gen->params_index);
+ code_gen->main_edge
+ = copy_bb_and_scalar_dependences (GBB_BB (gbb), region,
+ code_gen->main_edge, iv_map);
+ VEC_free (tree, heap, iv_map);
+ recompute_all_dominators ();
+ update_ssa (TODO_update_ssa);
+
+ opencl_verify ();
+}
+
+/* Add if statement, represented by S to current gimple.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_add_gimple_for_stmt_guard (struct clast_guard * s,
+ opencl_main code_gen, int depth)
+{
+ edge last_e = graphite_create_new_guard (code_gen->region,
+ code_gen->main_edge, s,
+ code_gen->newivs,
+ code_gen->newivs_index,
+ code_gen->params_index);
+
+ edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
+ code_gen->main_edge = true_e;
+ opencl_transform_stmt_list (s->then, code_gen, depth);
+ code_gen->main_edge = last_e;
+
+ recompute_all_dominators ();
+ opencl_verify ();
+}
+
+/* Reset data structures before processing loop, represented by META.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_init_new_loop (opencl_clast_meta meta, opencl_main code_gen)
+{
+ opencl_data curr;
+ unsigned i;
+ meta->post_pass_to_host
+ = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+ meta->post_pass_to_device
+ = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+
+ for (i = 0;
+ VEC_iterate (opencl_data, code_gen->opencl_function_data, i, curr);
+ i++)
+ {
+ curr->inited_in_current_loop_on_host = false;
+ curr->inited_in_current_loop_on_device = false;
+ }
+}
+
+/* Post loop init. Loop through all data stored in POST_PASS_TO_HOST
+ and POST_PASS_TO_DEVICE vectors in META. This data must be up to
+ date on host or device respectively at the end of current loop.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_postpass_data (opencl_main code_gen, opencl_clast_meta meta)
+{
+ opencl_data curr;
+ unsigned i;
+ for (i = 0;
+ VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
+ {
+ curr->written_in_current_body = false;
+ opencl_pass_to_host (code_gen, curr);
+ }
+
+ if (!flag_graphite_opencl_cpu)
+ for (i = 0;
+ VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
+ {
+ curr->written_in_current_body = false;
+ opencl_pass_to_device (code_gen, curr);
+ }
+ if (meta->parent)
+ {
+ VEC (opencl_data, heap) *parent_vec_host
+ = meta->parent->post_pass_to_host;
+ VEC (opencl_data, heap) *parent_vec_device
+ = meta->parent->post_pass_to_device;
+
+ for (i = 0;
+ VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++)
+ VEC_safe_push (opencl_data, heap, parent_vec_host, curr);
+
+ for (i = 0;
+ VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++)
+ VEC_safe_push (opencl_data, heap, parent_vec_device, curr);
+ }
+ VEC_free (opencl_data, heap, meta->post_pass_to_host);
+ VEC_free (opencl_data, heap, meta->post_pass_to_device);
+}
+
+/* Add loop body, of the loop, represented by S, on host.
+ Loop body can contain device code.
+ DEPTH contains depth of given loop in current loop nest.
+ DEPENDENCY indicates where given loop has any dependencies.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_add_gimple_for_loop (struct clast_for * s, opencl_main code_gen,
+ int depth, bool dependency)
+{
+ loop_p old_parent = code_gen->context_loop;
+ loop_p new_loop
+ = graphite_create_new_loop (code_gen->region,
+ code_gen->main_edge,
+ s, code_gen->context_loop,
+ & code_gen->newivs,
+ code_gen->newivs_index,
+ code_gen->params_index,
+ depth);
+
+ edge last_e = single_exit (new_loop);
+ edge to_body = single_succ_edge (new_loop->header);
+ basic_block after = to_body->dest;
+ opencl_clast_meta parent = code_gen->curr_meta->parent;
+ last_e = single_succ_edge (split_edge (last_e));
+
+ code_gen->context_loop = new_loop;
+ code_gen->main_edge = single_succ_edge (new_loop->header);
+
+ opencl_init_new_loop (parent, code_gen);
+ opencl_transform_stmt_list (s->body, code_gen, depth + 1);
+
+ code_gen->context_loop = old_parent;
+
+ redirect_edge_succ_nodup (code_gen->main_edge, after);
+ set_immediate_dominator (CDI_DOMINATORS, code_gen->main_edge->dest,
+ code_gen->main_edge->src);
+
+ opencl_postpass_data (code_gen, parent);
+ code_gen->main_edge = last_e;
+
+ if (flag_loop_parallelize_all && !dependency)
+ new_loop->can_be_parallel = true;
+ opencl_verify ();
+}
+
+/* Add loop, represented by S, on host. Loop body can contain device code.
+ DEPTH contains depth of given loop in current loop nest.
+ DEPENDENCY indicates where given loop has any dependencies.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_add_gimple_for_stmt_for (struct clast_for * s, opencl_main code_gen,
+ int depth, bool dependency)
+{
+ edge last_e = graphite_create_new_loop_guard (code_gen->region,
+ code_gen->main_edge,
+ s, code_gen->newivs,
+ code_gen->newivs_index,
+ code_gen->params_index);
+
+ edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest);
+ code_gen->main_edge = true_e;
+
+ opencl_add_gimple_for_loop (s, code_gen, depth, dependency);
+ code_gen->main_edge = last_e;
+}
+
+/* Calculate parent data access flags in META based on children.
+ parent->modified_on_host = OR_{forall children} child->modified_on_host.
+ parent->modified_on_device = OR_{forall children} child->modified_on_device.
+*/
+
+static void
+opencl_fix_meta_flags (opencl_clast_meta meta)
+{
+ opencl_clast_meta curr = meta->body;
+ while (curr)
+ {
+ bitmap_ior_into (meta->modified_on_host, curr->modified_on_host);
+ bitmap_ior_into (meta->modified_on_device, curr->modified_on_device);
+ curr = curr->next;
+ }
+}
+
+/* Delete opencl_body DATA. */
+
+static void
+opencl_body_delete (opencl_body data)
+{
+ dyn_string_delete (data->body);
+ dyn_string_delete (data->header);
+ dyn_string_delete (data->pre_header);
+ dyn_string_delete (data->non_scalar_args);
+ VEC_free (tree, heap, data->function_args);
+ VEC_free (tree, heap, data->function_args_to_pass);
+ VEC_free (opencl_data, heap, data->data_refs);
+ free (data);
+}
+
+/* Parse clast statement list S, located on depth DEPTH in current loop nest.
+ This function generates gimple from clast statements, but in case of
+ stmt_for either host or device code can be generated.
+ CODE_GEN holds information related to code generation. */
+
+static void
+opencl_transform_stmt_list (struct clast_stmt * s, opencl_main code_gen,
+ int depth)
+{
+ bool dump_p = dump_file && (dump_flags & TDF_DETAILS);
+ for ( ; s; s = s->next)
+ {
+ opencl_clast_meta tmp = code_gen->curr_meta;
+ if (CLAST_STMT_IS_A (s, stmt_root))
+ continue;
+ else if (CLAST_STMT_IS_A (s, stmt_user))
+ {
+ code_gen->curr_meta->init_edge = code_gen->main_edge;
+ opencl_add_gimple_for_user_stmt ((struct clast_user_stmt *) s,
+ code_gen);
+ code_gen->curr_meta = code_gen->curr_meta->next;
+ }
+ else if (CLAST_STMT_IS_A (s, stmt_for))
+ {
+ opencl_clast_meta current_clast = code_gen->curr_meta;
+ struct clast_for *for_stmt = (struct clast_for *) s;
+ bool dependency = false;
+ bool parallel = false;
+ /* If there are dependencies in loop, it can't be parallelized. */
+ if (!flag_graphite_opencl_no_dep_check &&
+ dependency_in_clast_loop_p (code_gen, current_clast,
+ for_stmt, depth))
+ {
+ if (dump_p)
+ fprintf (dump_file, "dependency in loop\n");
+ dependency = true;
+ }
+ if (!dependency)
+ parallel = opencl_should_be_parallel_p (code_gen, current_clast,
+ depth);
+
+ /* Create init block for memory transfer befor loop. */
+ current_clast->init_edge = code_gen->main_edge;
+
+ if (parallel && !dependency)
+ {
+ opencl_body current_body;
+ opencl_fflush_rw_flags (code_gen);
+ opencl_mark_privatized_data (code_gen);
+ current_clast->on_device = true;
+ current_body
+ = opencl_clast_to_kernel (for_stmt, code_gen, depth);
+
+ if (current_body->num_of_data_writes)
+ {
+ dyn_string_t header = current_body->header;
+ dyn_string_t pre_header = current_body->pre_header;
+ dyn_string_t body = current_body->body;
+
+ dyn_string_append (code_gen->main_program, header);
+ dyn_string_append (code_gen->main_program, pre_header);
+ dyn_string_append (code_gen->main_program, body);
+
+ opencl_create_gimple_for_body (current_body, code_gen);
+ }
+ htab_delete (code_gen->global_defined_vars);
+ update_ssa (TODO_update_ssa);
+ opencl_verify ();
+ opencl_body_delete (current_body);
+ code_gen->current_body = NULL;
+ }
+ else
+ {
+ code_gen->curr_meta = code_gen->curr_meta->body;
+ opencl_add_gimple_for_stmt_for (for_stmt, code_gen,
+ depth, dependency);
+ }
+
+ opencl_fix_meta_flags (current_clast);
+ code_gen->curr_meta = current_clast->next;
+ }
+ else if (CLAST_STMT_IS_A (s, stmt_guard))
+ opencl_add_gimple_for_stmt_guard ((struct clast_guard *) s,
+ code_gen, depth);
+ else if (CLAST_STMT_IS_A (s, stmt_block))
+ opencl_transform_stmt_list (((struct clast_block *)s)->body,
+ code_gen, depth);
+ else
+ gcc_unreachable ();
+ if (tmp->parent)
+ opencl_fix_meta_flags (tmp->parent);
+ }
+}
+
+/* Find opencl_data object by host object OBJ in CODE_GEN hash maps. */
+
+opencl_data
+opencl_get_data_by_tree (opencl_main code_gen, tree obj)
+{
+ map_tree_to_data tmp = map_tree_to_data_create (obj, NULL);
+ map_tree_to_data * slot
+ = (map_tree_to_data *) htab_find_slot (code_gen->tree_to_data,
+ tmp, INSERT);
+ if (*slot == NULL)
+ return NULL;
+
+ return (*slot)->value;
+}
+
+/* Find opencl_data object by data reference REF in CODE_GEN hash maps. */
+
+opencl_data
+opencl_get_data_by_data_ref (opencl_main code_gen, data_reference_p ref)
+{
+ map_ref_to_data tmp = map_ref_to_data_create (ref, NULL);
+ map_ref_to_data * slot
+ = (map_ref_to_data *) htab_find_slot (code_gen->ref_to_data,
+ tmp, INSERT);
+ if (*slot == NULL)
+ return NULL;
+
+ return (*slot)->value;
+}
+
+/* Register reference to DATA via data reference REF_KEY and
+ variable TREE_KEY in CODE_GEN structures. */
+
+static void
+opencl_register_data (opencl_main code_gen, opencl_data data,
+ tree tree_key, data_reference_p ref_key)
+{
+ htab_t ref_to_data = code_gen->ref_to_data;
+ htab_t tree_to_data = code_gen->tree_to_data;
+ map_ref_to_data ref_ptr = map_ref_to_data_create (ref_key, data);
+ map_tree_to_data tree_ptr = map_tree_to_data_create (tree_key, data);
+ map_ref_to_data * ref_slot;
+ map_tree_to_data * tree_slot;
+
+
+ ref_slot
+ = (map_ref_to_data *) htab_find_slot (ref_to_data, ref_ptr, INSERT);
+ gcc_assert (*ref_slot == NULL);
+ *ref_slot = ref_ptr;
+
+
+ tree_slot
+ = (map_tree_to_data *) htab_find_slot (tree_to_data, tree_ptr, INSERT);
+ gcc_assert (*tree_slot == NULL || (*tree_slot)->value == data);
+ *tree_slot = tree_ptr;
+}
+
+/* Create required OpenCL variable for given DATA. */
+
+static void
+opencl_data_init_object (opencl_data data)
+{
+ if (TREE_CODE (TREE_TYPE (data->exact_object)) == POINTER_TYPE)
+ {
+ data->device_object
+ = opencl_create_tmp_var (ptr_type_node, "__opencl_data");
+ data->is_static = false;
+ }
+ else
+ {
+ /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */
+ map_tree_to_data tree_ptr
+ = map_tree_to_data_create (data->exact_object, data);
+
+ map_tree_to_data * tree_slot =
+ (map_tree_to_data *) htab_find_slot (array_data_to_tree,
+ tree_ptr, INSERT);
+ gcc_assert (*tree_slot == NULL);
+ *tree_slot = tree_ptr;
+
+ data->device_object
+ = opencl_create_static_ptr_variable ("__opencl_data");
+ data->is_static = true;
+ data->size_variable = data->size_value;
+ VEC_safe_push (opencl_data, heap, opencl_array_data, data);
+ }
+}
+
+/* Find opencl_data which represents array VAR. */
+
+static opencl_data
+opencl_get_static_data_by_tree (tree var)
+{
+ map_tree_to_data tmp = map_tree_to_data_create (var, NULL);
+ map_tree_to_data * slot
+ = (map_tree_to_data *) htab_find_slot (array_data_to_tree,
+ tmp, INSERT);
+ if (*slot == NULL)
+ return NULL;
+
+ return (*slot)->value;
+
+}
+
+/* Analyze single data reference REF and update CODE_GEN structures.
+ If it access data, which has been accessed in data references
+ before, update it's size. Otherwise add data to array. */
+
+static void
+opencl_parse_single_data_ref (poly_dr_p ref, opencl_main code_gen)
+{
+ data_reference_p d_ref = (data_reference_p) PDR_CDR (ref);
+ tree data_ref_tree = dr_outermost_base_object (d_ref);
+ tree size = NULL_TREE;
+ opencl_data curr;
+
+ curr = opencl_get_data_by_tree (code_gen, data_ref_tree);
+ size = opencl_get_indirect_size (data_ref_tree, ref);
+ if (curr)
+ {
+ if (!curr->is_static)
+ {
+ if (!size || !curr->size_value)
+ curr->size_value = NULL;
+ else
+ curr->size_value = fold_build2 (MAX_EXPR, sizetype,
+ size, curr->size_value);
+ }
+ }
+ else
+ {
+ curr = opencl_get_static_data_by_tree (data_ref_tree);
+ if (!curr)
+ {
+ curr = opencl_data_create (data_ref_tree, size);
+ opencl_data_init_object (curr);
+ }
+ curr->id = VEC_length (opencl_data, code_gen->opencl_function_data);
+ VEC_safe_push (opencl_data, heap, code_gen->opencl_function_data, curr);
+ }
+ opencl_register_data (code_gen, curr, data_ref_tree, d_ref);
+}
+
+/* Analyse all data reference for poly basic block PBB and update CODE_GEN
+ structures. */
+
+static void
+opencl_parse_data_refs (poly_bb_p pbb, opencl_main code_gen)
+{
+ VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb);
+ int i;
+ poly_dr_p curr;
+ for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++)
+ opencl_parse_single_data_ref (curr, code_gen);
+}
+
+/* Analyse all data reference for scop M_SCOP and update
+ CODE_GEN structures. */
+
+static void
+opencl_init_data (scop_p m_scop, opencl_main code_gen)
+{
+ VEC (poly_bb_p, heap) * bbs = SCOP_BBS (m_scop);
+ int i;
+ poly_bb_p curr;
+ for (i = 0; VEC_iterate (poly_bb_p, bbs, i, curr); i++)
+ opencl_parse_data_refs (curr, code_gen);
+}
+
+/* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge. */
+
+static void
+opencl_wait_for_event (opencl_main code_gen, tree event_var)
+{
+ tree function = opencl_create_function_decl (WAIT_FOR_EVENTS);
+ tree call = build_call_expr (function, 2,
+ integer_one_node,
+ event_var);
+ opencl_add_safe_call (code_gen, call, true);
+}
+
+/* This calls must be placed after outermost loop processing. */
+
+/* Add function calls to create and launch kernel KERNEL to
+ CODE_GEN->main_edge. */
+
+static void
+opencl_create_gimple_for_body (opencl_body kernel, opencl_main code_gen)
+{
+ tree num_of_exec = kernel->num_of_exec;
+ tree call;
+
+ tree kernel_var
+ = opencl_insert_create_kernel_call (code_gen, (const char *)kernel->name);
+
+ tree index_type = build_index_type (build_int_cst (NULL_TREE, 2));
+ tree array_type = build_array_type (ptr_type_node, index_type);
+ tree var = opencl_create_tmp_var (array_type, "wait_event");
+ TREE_STATIC (var) = 1;
+ assemble_variable (var, 1, 0, 1);
+
+ call = build4 (ARRAY_REF, ptr_type_node, var,
+ integer_zero_node, NULL_TREE, NULL_TREE);
+ call = build_addr (call, current_function_decl);
+
+ opencl_init_local_device_memory (code_gen, kernel);
+ opencl_pass_kernel_arguments (code_gen, kernel, kernel_var);
+
+ opencl_execute_kernel (code_gen, num_of_exec, kernel_var, call);
+ opencl_wait_for_event (code_gen, call);
+}
+
+/* Create global variables for opencl code. */
+
+static void
+opencl_create_gimple_variables (void)
+{
+ static bool opencl_var_created = false;
+ if (opencl_var_created)
+ return;
+
+ opencl_var_created = true;
+ /* cl_context h_context */
+ h_context = opencl_create_static_ptr_variable ("__ocl_h_context");
+
+ /* cl_command_queue h_cmd_queue */
+ h_cmd_queue = opencl_create_static_ptr_variable ("__ocl_h_cmd_queue");
+}
+
+/* Return tree, which represents function selected by ID.
+ If ID is STATIC_INIT, init all required data. */
+
+static tree
+opencl_create_function_decl (enum OPENCL_FUNCTUONS id)
+{
+ static tree create_context_from_type_decl = NULL;
+ static tree get_context_info_decl = NULL;
+ static tree create_command_queue_decl = NULL;
+ static tree create_program_with_source_decl = NULL;
+ static tree build_program_decl = NULL;
+ static tree create_kernel_decl = NULL;
+ static tree create_buffer_decl = NULL;
+ static tree set_kernel_arg_decl = NULL;
+ static tree enqueue_nd_range_kernel_decl = NULL;
+ static tree enqueue_read_buffer_decl = NULL;
+ static tree enqueue_write_buffer_decl = NULL;
+ static tree release_memory_obj_decl = NULL;
+ static tree release_context_decl = NULL;
+ static tree release_command_queue_decl = NULL;
+ static tree release_program_decl = NULL;
+ static tree release_kernel_decl = NULL;
+ static tree get_platform_ids_decl = NULL;
+ static tree get_wait_for_events_decl = NULL;
+ switch (id)
+ {
+ case STATIC_INIT:
+ {
+ tree const_char_type = build_qualified_type (char_type_node,
+ TYPE_QUAL_CONST);
+ tree const_char_ptr = build_pointer_type (const_char_type);
+ tree const_char_ptr_ptr = build_pointer_type (const_char_ptr);
+
+ tree const_size_t = build_qualified_type (size_type_node,
+ TYPE_QUAL_CONST);
+ tree const_size_t_ptr = build_pointer_type (const_size_t);
+
+ tree size_t_ptr = build_pointer_type (size_type_node);
+
+ tree cl_device_type = integer_type_node;
+ tree cl_context_info = unsigned_type_node;
+ tree cl_command_queue_properties = long_unsigned_type_node;
+ tree cl_mem_flags = long_unsigned_type_node;
+
+ tree cl_context = ptr_type_node;
+ tree cl_context_properties = ptr_type_node;
+ tree cl_command_queue = ptr_type_node;
+ tree cl_device_id = ptr_type_node;
+ tree cl_program = ptr_type_node;
+ tree cl_kernel = ptr_type_node;
+ tree cl_event = ptr_type_node;
+ tree cl_mem = ptr_type_node;
+
+ tree const_cl_event = build_qualified_type (cl_event,
+ TYPE_QUAL_CONST);
+ tree cl_event_ptr = build_pointer_type (cl_event);
+ tree const_cl_event_ptr = build_pointer_type (const_cl_event);
+
+ tree const_cl_device_id = build_qualified_type (cl_device_id,
+ TYPE_QUAL_CONST);
+ tree const_cl_device_id_ptr = build_pointer_type (const_cl_device_id);
+
+ tree cl_platford_id = long_integer_type_node;
+ tree cl_platford_id_ptr = build_pointer_type (cl_platford_id);
+
+ tree function_type;
+ /* | cl_context
+ | clCreateContextFromType (cl_context_properties *properties,
+ | cl_device_type device_type,
+ | void (*pfn_notify) (const char *errinfo,
+ | const void *private_info, size_t cb,
+ | void *user_data),
+ | void *user_data,
+ | cl_int *errcode_ret) */
+ function_type
+ = build_function_type_list (cl_context,
+ cl_context_properties,
+ cl_device_type,
+ ptr_type_node,
+ ptr_type_node,
+ integer_ptr_type_node,
+ NULL_TREE);
+ create_context_from_type_decl
+ = build_fn_decl (opencl_function_names[0], function_type);
+
+ /* | cl_int clGetContextInfo (cl_context context,
+ | cl_context_info param_name,
+ | size_t param_value_size,
+ | void *param_value,
+ | size_t *param_value_size_ret) */
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_context,
+ cl_context_info,
+ size_type_node,
+ ptr_type_node,
+ size_t_ptr,
+ NULL_TREE);
+ get_context_info_decl
+ = build_fn_decl (opencl_function_names[1], function_type);
+
+ /* | cl_command_queue
+ | clCreateCommandQueue (cl_context context,
+ | cl_device_id device,
+ | cl_command_queue_properties properties,
+ | cl_int *errcode_ret) */
+ function_type
+ = build_function_type_list (cl_command_queue,
+ cl_context,
+ cl_device_id,
+ cl_command_queue_properties,
+ integer_ptr_type_node,
+ NULL_TREE);
+ create_command_queue_decl
+ = build_fn_decl (opencl_function_names[2], function_type);
+
+ /* | cl_program clCreateProgramWithSource (cl_context context,
+ | cl_uint count,
+ | const char **strings,
+ | const size_t *lengths,
+ | cl_int *errcode_ret) */
+ function_type
+ = build_function_type_list (cl_program,
+ cl_context,
+ unsigned_type_node,
+ const_char_ptr_ptr,
+ const_size_t_ptr,
+ integer_ptr_type_node,
+ NULL_TREE);
+ create_program_with_source_decl
+ = build_fn_decl (opencl_function_names[3], function_type);
+
+ /* | cl_int
+ | clBuildProgram (cl_program program,
+ | cl_uint num_devices,
+ | const cl_device_id *device_list,
+ | const char *options,
+ | void (*pfn_notify) (cl_program, void *user_data),
+ | void *user_data) */
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_program,
+ unsigned_type_node,
+ const_cl_device_id_ptr,
+ const_char_ptr,
+ ptr_type_node,
+ ptr_type_node,
+ NULL_TREE);
+ build_program_decl
+ = build_fn_decl (opencl_function_names[4], function_type);
+
+ /* | cl_kernel clCreateKernel (cl_program program,
+ | const char *kernel_name,
+ | cl_int *errcode_ret) */
+ function_type
+ = build_function_type_list (cl_kernel,
+ cl_program,
+ const_char_ptr,
+ integer_ptr_type_node,
+ NULL_TREE);
+
+ create_kernel_decl
+ = build_fn_decl (opencl_function_names[5], function_type);
+
+ /* | cl_mem clCreateBuffer (cl_context context,
+ | cl_mem_flags flags,
+ | size_t size,
+ | void *host_ptr,
+ | cl_int *errcode_ret) */
+
+ function_type
+ = build_function_type_list (cl_mem,
+ cl_context,
+ cl_mem_flags,
+ size_type_node,
+ ptr_type_node,
+ integer_ptr_type_node,
+ NULL_TREE);
+ create_buffer_decl
+ = build_fn_decl (opencl_function_names[6], function_type);
+
+
+ /* | cl_int clSetKernelArg (cl_kernel kernel,
+ | cl_uint arg_index,
+ | size_t arg_size,
+ | const void *arg_value) */
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_kernel,
+ unsigned_type_node,
+ size_type_node,
+ const_ptr_type_node,
+ NULL_TREE);
+ set_kernel_arg_decl
+ = build_fn_decl (opencl_function_names[7], function_type);
+
+ /* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue,
+ | cl_kernel kernel,
+ | cl_uint work_dim,
+ | const size_t *global_work_offset,
+ | const size_t *global_work_size,
+ | const size_t *local_work_size,
+ | cl_uint num_events_in_wait_list,
+ | const cl_event *event_wait_list,
+ | cl_event *event) */
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_command_queue,
+ cl_kernel,
+ unsigned_type_node,
+ const_size_t_ptr,
+ const_size_t_ptr,
+ const_size_t_ptr,
+ unsigned_type_node,
+ const_cl_event_ptr,
+ cl_event_ptr,
+ NULL_TREE);
+
+ enqueue_nd_range_kernel_decl
+ = build_fn_decl (opencl_function_names[8], function_type);
+
+ /* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue,
+ | cl_mem buffer,
+ | cl_bool blocking_read,
+ | size_t offset,
+ | size_t cb,
+ | void *ptr,
+ | cl_uint num_events_in_wait_list,
+ | const cl_event *event_wait_list,
+ | cl_event *event) */
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_command_queue,
+ cl_mem,
+ unsigned_type_node,
+ size_type_node,
+ size_type_node,
+ ptr_type_node,
+ unsigned_type_node,
+ const_cl_event_ptr,
+ cl_event_ptr,
+ NULL_TREE);
+
+ enqueue_read_buffer_decl
+ = build_fn_decl (opencl_function_names[9], function_type);
+
+ /* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue,
+ | cl_mem buffer,
+ | cl_bool blocking_write,
+ | size_t offset,
+ | size_t cb,
+ | const void *ptr,
+ | cl_uint num_events_in_wait_list,
+ | const cl_event *event_wait_list,
+ | cl_event *event) */
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ cl_command_queue,
+ cl_mem,
+ unsigned_type_node,
+ size_type_node,
+ size_type_node,
+ const_ptr_type_node,
+ unsigned_type_node,
+ const_cl_event_ptr,
+ cl_event_ptr,
+ NULL_TREE);
+
+ enqueue_write_buffer_decl
+ = build_fn_decl (opencl_function_names[10], function_type);
+
+
+ /* cl_int clReleaseMemObject (cl_mem memobj) */
+
+ function_type
+ = build_function_type_list (integer_type_node, cl_mem, NULL_TREE);
+
+ release_memory_obj_decl
+ = build_fn_decl (opencl_function_names[11], function_type);
+
+
+ /* cl_int clReleaseContext (cl_context context) */
+ function_type
+ = build_function_type_list (integer_type_node, cl_context,
+ NULL_TREE);
+
+ release_context_decl
+ = build_fn_decl (opencl_function_names[12], function_type);
+
+ /* cl_int clReleaseCommandQueue (cl_command_queue command_queue) */
+ function_type
+ = build_function_type_list (integer_type_node, cl_command_queue,
+ NULL_TREE);
+
+ release_command_queue_decl
+ = build_fn_decl (opencl_function_names[13], function_type);
+
+ /* cl_int clReleaseProgram (cl_program program) */
+ function_type
+ = build_function_type_list (integer_type_node, cl_program,
+ NULL_TREE);
+
+ release_program_decl
+ = build_fn_decl (opencl_function_names[14], function_type);
+
+ /* cl_int clReleaseKernel (cl_kernel kernel) */
+ function_type
+ = build_function_type_list (integer_type_node, cl_kernel, NULL_TREE);
+
+ release_kernel_decl
+ = build_fn_decl (opencl_function_names[15], function_type);
+
+ /* | cl_int clGetPlatformIDs (cl_uint num_entries,
+ | cl_platform_id *platforms,
+ | cl_uint *num_platforms) */
+
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ unsigned_type_node,
+ cl_platford_id_ptr,
+ build_pointer_type (unsigned_type_node),
+ NULL_TREE);
+ get_platform_ids_decl
+ = build_fn_decl (opencl_function_names [16], function_type);
+
+
+ /* | cl_int clWaitForEvents (cl_uint num_events,
+ | const cl_event *event_list) */
+
+ function_type
+ = build_function_type_list (integer_type_node,
+ unsigned_type_node,
+ const_cl_event_ptr,
+ NULL_TREE);
+
+ get_wait_for_events_decl
+ = build_fn_decl (opencl_function_names [17], function_type);
+
+ return NULL_TREE;
+ }
+ case CREATE_CONTEXT_FROM_TYPE: return create_context_from_type_decl;
+ case GET_CONTEXT_INFO: return get_context_info_decl;
+ case CREATE_COMMAND_QUEUE: return create_command_queue_decl;
+ case CREATE_PROGRAM_WITH_SOURCE: return create_program_with_source_decl;
+ case BUILD_PROGRAM: return build_program_decl;
+ case CREATE_KERNEL: return create_kernel_decl;
+ case CREATE_BUFFER: return create_buffer_decl;
+ case SET_KERNEL_ARG: return set_kernel_arg_decl;
+ case ENQUEUE_ND_RANGE_KERNEL: return enqueue_nd_range_kernel_decl;
+ case ENQUEUE_READ_BUFFER: return enqueue_read_buffer_decl;
+ case ENQUEUE_WRITE_BUFFER: return enqueue_write_buffer_decl;
+ case RELEASE_MEMORY_OBJ: return release_memory_obj_decl;
+ case RELEASE_CONTEXT: return release_context_decl;
+ case RELEASE_COMMAND_QUEUE: return release_command_queue_decl;
+ case RELEASE_PROGRAM: return release_program_decl;
+ case RELEASE_KERNEL: return release_kernel_decl;
+ case GET_PLATFORM_IDS: return get_platform_ids_decl;
+ case WAIT_FOR_EVENTS: return get_wait_for_events_decl;
+ default: gcc_unreachable ();
+ }
+}
+
+/* Create call
+ | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0,
+ | &n_context_descriptor_size);
+
+ POINTER_TO_SIZE if &n_context_descriptor_size. */
+
+static tree
+opencl_create_clGetContextInfo_1 (tree pointer_to_size)
+{
+ tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
+ tree zero_pointer = null_pointer_node;
+ tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
+ tree context_var = h_context;
+ tree call = build_call_expr (function, 5,
+ context_var,
+ cl_contex_devices,
+ integer_zero_node,
+ zero_pointer,
+ pointer_to_size);
+ return call;
+}
+
+/* Create call
+ | clGetContextInfo (h_context, CL_CONTEXT_DEVICES,
+ | n_context_descriptor_size, A_DEVICES, 0);
+
+ POINTER_TO_SIZE if &n_context_descriptor_size. */
+
+static tree
+opencl_create_clGetContextInfo_2 (tree size, tree a_devices)
+{
+ tree function = opencl_create_function_decl (GET_CONTEXT_INFO);
+ tree zero_pointer = null_pointer_node;
+ tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES);
+ tree context_var = h_context;
+ tree call = build_call_expr (function, 5,
+ context_var,
+ cl_contex_devices,
+ size,
+ a_devices,
+ zero_pointer);
+ return call;
+}
+
+/* Create context_properties array variable. */
+
+static tree
+opencl_create_context_properties (void)
+{
+ tree cl_context_properties_type = long_integer_type_node;
+
+ tree index_type = build_index_type (build_int_cst (NULL_TREE, 3));
+ tree array_type = build_array_type (cl_context_properties_type,
+ index_type);
+ return opencl_create_tmp_var (array_type, "context_properties");
+}
+
+/* Place calls to obtain current platform id to INIT_EDGE.
+ Place obtained id to VAR. */
+
+static edge
+opencl_set_context_properties (edge init_edge, tree var)
+{
+ tree function = opencl_create_function_decl (GET_PLATFORM_IDS);
+ tree cl_context_properties_type = long_integer_type_node;
+ tree call;
+ tree call2;
+ gimple_stmt_iterator g_iter;
+
+ basic_block bb = split_edge (init_edge);
+
+ init_edge = single_succ_edge (bb);
+
+ g_iter = gsi_last_bb (bb);
+ call = build4 (ARRAY_REF, cl_context_properties_type,
+ var, integer_zero_node, NULL_TREE, NULL_TREE);
+ call2 = build_int_cst (NULL_TREE, CL_CONTEXT_PLATFORM);
+ call2 = build1 (CONVERT_EXPR, cl_context_properties_type, call2);
+
+ call = build2 (MODIFY_EXPR, cl_context_properties_type,
+ call, call2);
+
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ g_iter = gsi_last_bb (bb);
+
+ call = build4 (ARRAY_REF, cl_context_properties_type,
+ var, integer_one_node, NULL_TREE, NULL_TREE);
+ call2 = build_call_expr (function, 3,
+ integer_one_node,
+ build_addr (call, current_function_decl),
+ null_pointer_node);
+ force_gimple_operand_gsi (&g_iter, call2, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+
+ call = build4 (ARRAY_REF, cl_context_properties_type,
+ var, build_int_cst (NULL_TREE, 2), NULL_TREE, NULL_TREE);
+ call = build2 (MODIFY_EXPR, cl_context_properties_type,
+ call, fold_convert (cl_context_properties_type,
+ integer_zero_node));
+ force_gimple_operand_gsi (&g_iter, call, true, NULL, false,
+ GSI_CONTINUE_LINKING);
+ return init_edge;
+}
+
+/* Create call
+ clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0); */
+
+static tree
+opencl_create_clCreateContextFromType (tree properties)
+{
+ tree function = opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE);
+ tree zero_pointer = null_pointer_node;
+ tree device
+ = build_int_cst (NULL_TREE, flag_graphite_opencl_cpu
+ ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU);
+ tree call;
+ call = build_call_expr (function, 5,
+ build_addr (properties, current_function_decl),
+ device,
+ zero_pointer,
+ zero_pointer,
+ zero_pointer);
+ return call;
+}
+
+/* Create call
+ clCreateCommandQueue (h_context, DEV_ID, 0, 0); */
+
+static tree
+opencl_create_clCreateCommandQueue (tree dev_id)
+{
+
+ tree function = opencl_create_function_decl (CREATE_COMMAND_QUEUE);
+ tree zero_pointer = null_pointer_node;
+ tree context = h_context;
+ tree call = build_call_expr (function, 4,
+ context,
+ dev_id,
+ zero_pointer,
+ zero_pointer);
+ return call;
+}
+
+/* Create call malloc (ARG). */
+
+static tree
+opencl_create_malloc_call (tree arg)
+{
+ tree function_type =
+ build_function_type_list (ptr_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree function = build_fn_decl ("malloc", function_type);
+
+ tree call = build_call_expr (function, 1, arg);
+ return call;
+}
+
+/* Init basic block in CODE_GEN structures. */
+
+static void
+opencl_init_basic_blocks (opencl_main code_gen)
+{
+ code_gen->data_init_bb = opencl_create_bb (code_gen);
+ code_gen->kernel_edge = code_gen->main_edge;
+}
+
+/* Generate calls for opencl init functions and place them to INIT_EDGE.
+ Must be called only once in each function. */
+
+static edge
+opencl_create_init_context (edge init_edge)
+{
+ tree tmp_type;
+ tree call;
+ tree n_context_descriptor_size
+ = opencl_create_tmp_var (size_type_node, "__ocl_nContextDescriptorSize");
+ tree a_devices = opencl_create_tmp_var (build_pointer_type (ptr_type_node),
+ "__ocl_a_devices");
+ tree properties = opencl_create_context_properties ();
+ init_edge = opencl_set_context_properties (init_edge, properties);
+ call = opencl_create_clCreateContextFromType (properties);
+ call = build2 (MODIFY_EXPR, TREE_TYPE (h_context),
+ h_context, call);
+ init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+ tmp_type = build_pointer_type
+ (TREE_TYPE (n_context_descriptor_size));
+ call = build1 (ADDR_EXPR, tmp_type,
+ n_context_descriptor_size);
+ call = opencl_create_clGetContextInfo_1 (call);
+ init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
+ call = opencl_create_malloc_call (n_context_descriptor_size);
+ call = fold_convert (TREE_TYPE (a_devices), call);
+ call = build2 (MODIFY_EXPR, TREE_TYPE (a_devices), a_devices, call);
+ init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+ call = opencl_create_clGetContextInfo_2 (n_context_descriptor_size,
+ a_devices);
+ init_edge = opencl_add_safe_call_on_edge (call, true, init_edge);
+ tmp_type = TREE_TYPE (TREE_TYPE (a_devices));
+ call = build1 (INDIRECT_REF, tmp_type, a_devices);
+ call = opencl_create_clCreateCommandQueue (call);
+ call = build2 (MODIFY_EXPR, TREE_TYPE (h_cmd_queue),
+ h_cmd_queue, call);
+ init_edge = opencl_add_safe_call_on_edge (call, false, init_edge);
+ return init_edge;
+}
+
+/* Fill array VEC with all poly basic blocks in clast statement ROOT. */
+
+static void
+build_poly_bb_vec (struct clast_stmt * root,
+ VEC (poly_bb_p, heap) ** vec)
+{
+ while (root)
+ {
+ if (CLAST_STMT_IS_A (root, stmt_user))
+ {
+ poly_bb_p tmp
+ = (poly_bb_p) cloog_statement_usr
+ (((struct clast_user_stmt *) root)->statement);
+ VEC_safe_push (poly_bb_p, heap, *vec, tmp);
+ }
+ else if (CLAST_STMT_IS_A (root, stmt_for))
+ build_poly_bb_vec (((struct clast_for *) root)->body, vec);
+ else if (CLAST_STMT_IS_A (root, stmt_guard))
+ build_poly_bb_vec (((struct clast_guard *) root)->then, vec);
+ else if (CLAST_STMT_IS_A (root, stmt_block))
+ build_poly_bb_vec (((struct clast_block *) root)->body, vec);
+ root = root->next;
+ }
+}
+
+/* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL.
+ CAN_BE_PRIVATE indicates which variables can be privatizated.
+ CODE_GEN holds information related to code generation. */
+
+static bool
+opencl_dependency_between_pbbs_p (opencl_main code_gen, poly_bb_p pbb1,
+ poly_bb_p pbb2, int level,
+ bitmap can_be_private)
+{
+ int i, j;
+ poly_dr_p pdr1, pdr2;
+ timevar_push (TV_GRAPHITE_DATA_DEPS);
+ for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb1), i, pdr1); i++)
+ {
+ data_reference_p ref1 = (data_reference_p)PDR_CDR (pdr1);
+ opencl_data data_1 = opencl_get_data_by_data_ref (code_gen, ref1);
+ if (bitmap_bit_p (can_be_private, data_1->id))
+ continue;
+
+ for (j = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb2), j, pdr2); j++)
+ {
+ data_reference_p ref2 = (data_reference_p)PDR_CDR (pdr2);
+
+ opencl_data data_2 = opencl_get_data_by_data_ref (code_gen, ref2);
+
+ if (bitmap_bit_p (can_be_private, data_2->id))
+ continue;
+
+ if (graphite_carried_dependence_level_k (pdr1, pdr2, level))
+ {
+ timevar_pop (TV_GRAPHITE_DATA_DEPS);
+ return true;
+ }
+ }
+ }
+
+ timevar_pop (TV_GRAPHITE_DATA_DEPS);
+ return false;
+}
+
+/* Returns true, if there is dependency in clast loop STMT on depth DEPTH.
+ CODE_GEN holds information related to code generation. */
+
+bool
+dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta,
+ struct clast_for * stmt, int depth)
+{
+ VEC (poly_bb_p, heap) * pbbs = VEC_alloc (poly_bb_p, heap, 10);
+ int level = get_scattering_level (depth);
+ int i;
+ poly_bb_p pbb1;
+ bitmap can_be_private;
+ build_poly_bb_vec (stmt->body, &pbbs);
+
+ can_be_private = meta->can_be_private;
+
+ for (i = 0; VEC_iterate (poly_bb_p, pbbs, i, pbb1); i++)
+ {
+ int j;
+ poly_bb_p pbb2;
+ for (j = 0; VEC_iterate (poly_bb_p, pbbs, j, pbb2); j++)
+ if (opencl_dependency_between_pbbs_p (code_gen, pbb1, pbb1,
+ level, can_be_private))
+ {
+ VEC_free (poly_bb_p, heap, pbbs);
+ return true;
+ }
+ }
+ VEC_free (poly_bb_p, heap, pbbs);
+ return false;
+}
+
+/* Init graphite-opencl pass. Must be called in each function before
+ any scop processing. */
+
+void
+graphite_opencl_init (void)
+{
+ opencl_create_gimple_variables ();
+
+ /* cl_program h_program */
+ h_program
+ = opencl_create_static_ptr_variable ("__ocl_h_program");
+
+ opencl_function_kernels = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE);
+ main_program_src = dyn_string_new (100);
+
+ opencl_array_data = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE);
+ array_data_to_tree = htab_create (10, map_tree_to_data_to_hash,
+ map_tree_to_data_cmp, free);
+
+ opencl_create_function_decl (STATIC_INIT);
+}
+
+/* Create calls to initialize static data for current function and
+ place them to INIT_EDGE. */
+
+static edge
+opencl_init_static_data (edge init_edge)
+{
+ int i;
+ opencl_data curr;
+ for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i ++)
+ {
+ tree tmp;
+ if (!curr->used_on_device)
+ continue;
+
+ tmp = opencl_create_memory_for_pointer (curr);
+ tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp);
+ init_edge = opencl_add_safe_call_on_edge (tmp, false, init_edge);
+ }
+ return init_edge;
+}
+
+/* Finalize graphite-opencl pass for current function. Place all required
+ calls to STATIC_INIT_EDGE. Must be called after all scop processing
+ in current function. */
+
+void
+graphite_opencl_finalize (edge static_init_edge)
+{
+ int i;
+ opencl_data curr;
+ if (dyn_string_length (main_program_src) != 0)
+ {
+ tree call = build2 (EQ_EXPR, boolean_type_node,
+ h_program, null_pointer_node);
+ basic_block buff_init_block = split_edge (static_init_edge);
+ edge before_init;
+ edge init_edge;
+
+ static_init_edge = single_succ_edge (buff_init_block);
+
+ create_empty_if_region_on_edge (static_init_edge, call);
+
+ static_init_edge = opencl_create_function_call
+ (get_true_edge_from_guard_bb (static_init_edge->dest));
+ static_init_edge = opencl_init_static_data (static_init_edge);
+ before_init = single_pred_edge (buff_init_block);
+ call = build2 (EQ_EXPR, boolean_type_node,
+ h_context, null_pointer_node);
+
+ create_empty_if_region_on_edge (before_init, call);
+ init_edge = get_true_edge_from_guard_bb (before_init->dest);
+ init_edge = opencl_create_init_context (init_edge);
+ }
+ dyn_string_delete (main_program_src);
+ for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i++)
+ opencl_data_delete (curr);
+
+ VEC_free (tree, heap, opencl_function_kernels);
+ VEC_free (opencl_data, heap, opencl_array_data);
+ recompute_all_dominators ();
+ update_ssa (TODO_update_ssa);
+}
+
+/* Debug functions for deined data structures. */
+
+static void
+dump_flag_to_file (const char * name, bool cond,
+ FILE * file, int indent)
+{
+ indent_to (file, indent);
+ fprintf (file, "%s = %s", name, cond? "true" : "false");
+}
+
+void
+dump_opencl_data (opencl_data data, FILE * file, bool verbose)
+{
+ fprintf (file, "Data id = %d\n", data->id);
+ fprintf (file, "Data dimension = %d\n", data->data_dim);
+ fprintf (file, "Data depth = %d\n", data->depth);
+ fprintf (file, "Flags");
+ indent_to (file, 2);
+ fprintf (file, "Global");
+ dump_flag_to_file ("Static", data->is_static, file, 4);
+ dump_flag_to_file ("Can be private", data->can_be_private, file, 4);
+ dump_flag_to_file ("Used on device", data->used_on_device, file, 4);
+ dump_flag_to_file ("Ever read on device",
+ data->ever_read_on_device, file, 4);
+
+ dump_flag_to_file ("Ever written on device",
+ data->ever_written_on_device, file, 4);
+
+ dump_flag_to_file ("Supported", data->supported, file, 4);
+ indent_to (file, 2);
+ fprintf (file, "Local");
+
+ dump_flag_to_file ("Up to date on device",
+ data->up_to_date_on_device, file, 4);
+ dump_flag_to_file ("Up to date on host",
+ data->up_to_date_on_host, file, 4);
+
+ dump_flag_to_file ("Inited in current loop on host",
+ data->inited_in_current_loop_on_host, file, 4);
+
+ dump_flag_to_file ("Inited in current loop on device",
+ data->inited_in_current_loop_on_device, file, 4);
+
+ dump_flag_to_file ("Written in current body",
+ data->written_in_current_body, file, 4);
+
+ dump_flag_to_file ("Read in current body",
+ data->read_in_current_body, file, 4);
+ dump_flag_to_file ("Privatized", data->privatized, file, 4);
+
+ fprintf (file, "\n");
+
+ if (verbose)
+ {
+ fprintf (file, "\nObject\n");
+ print_node_brief (file, "", data->object, 2);
+
+ fprintf (file, "\nDevice object\n");
+ print_node_brief (file, "", data->device_object, 2);
+
+ fprintf (file, "\nSize value\n");
+ print_node_brief (file, "", data->size_value, 2);
+
+ fprintf (file, "\nSize variable\n");
+ print_node_brief (file, "", data->size_variable, 2);
+
+ fprintf (file, "\nExact object\n");
+ print_node_brief (file, "", data->exact_object, 2);
+ }
+}
+
+DEBUG_FUNCTION void
+debug_opencl_data (opencl_data data, bool verbose)
+{
+ dump_opencl_data (data, stderr, verbose);
+}
+
+void
+dump_opencl_body (opencl_body body, FILE * file, bool verbose)
+{
+ fprintf (file, "\n%s\n\n", body->name);
+ fprintf (file, "First iterator: %s\n", body->first_iter);
+ fprintf (file, "Last iterator: %s\n", body->last_iter);
+ fprintf (file, "Number of data writes = %d\n\n", body->num_of_data_writes);
+ fprintf (file, "Function header::\n");
+ fprintf (file, "%s\n\n", dyn_string_buf (body->header));
+ fprintf (file, "Non scalar args::\n");
+ fprintf (file, "%s\n\n", dyn_string_buf (body->non_scalar_args));
+ fprintf (file, "Pre header::\n");
+ fprintf (file, "%s\n\n", dyn_string_buf (body->pre_header));
+ fprintf (file, "Body::\n");
+ fprintf (file, "%s\n\n", dyn_string_buf (body->body));
+
+ fprintf (file, "Number of executions::\n");
+ print_node_brief (file, "", body->num_of_exec, 2);
+
+ if (verbose)
+ print_clast_stmt (file, body->clast_body);
+}
+
+DEBUG_FUNCTION void
+debug_opencl_body (opencl_body body, bool verbose)
+{
+ dump_opencl_body (body, stderr, verbose);
+}
+
+void
+dump_opencl_clast_meta (opencl_clast_meta meta, FILE * file,
+ bool verbose, int indent)
+{
+ if (!verbose)
+ /* Just print structure of meta. */
+ {
+ while (meta)
+ {
+ indent_to (file, indent);
+ fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
+ meta->in_depth, meta->out_depth,
+ meta->on_device?"true":"false",
+ meta->access_unsupported?"false":"true");
+ dump_opencl_clast_meta (meta->body, file, false, indent + 4);
+ meta = meta->next;
+ }
+ }
+ else
+ {
+ fprintf (file, "<in = %d, out = %d, dev = %s, ok = %s>",
+ meta->in_depth, meta->out_depth,
+ meta->on_device?"true":"false",
+ meta->access_unsupported?"false":"true");
+
+ fprintf (file, "\nModified on host::\n");
+ debug_bitmap_file (file, meta->modified_on_host);
+
+ fprintf (file, "\nModified on device::\n");
+ debug_bitmap_file (file, meta->modified_on_device);
+
+ fprintf (file, "\nAccess::\n");
+ debug_bitmap_file (file, meta->access);
+
+ fprintf (file, "\nCan be private::\n");
+ debug_bitmap_file (file, meta->can_be_private);
+ }
+}
+
+DEBUG_FUNCTION void
+debug_opencl_clast_meta (opencl_clast_meta meta, bool verbose)
+{
+ dump_opencl_clast_meta (meta, stderr, verbose, 0);
+}
+
+static int
+print_char_p_htab (void ** h, void * v)
+{
+ char ** ptr = (char **)h;
+ FILE * file = (FILE *)v;
+ fprintf (file, " %s\n", *ptr);
+ return 1;
+}
+
+static int
+print_tree_to_data_htab (void ** h, void * v)
+{
+ map_tree_to_data * map = (map_tree_to_data *)h;
+ FILE * file = (FILE *)v;
+ tree key = (*map)->key;
+ opencl_data data = (*map)->value;
+ print_node_brief (file, "key = ", key, 2);
+ fprintf (file, " data_id = %d\n", data->id);
+ return 1;
+}
+
+static int
+print_ref_to_data_htab (void ** h, void * v)
+{
+ map_ref_to_data * map = (map_ref_to_data *)h;
+ FILE * file = (FILE *)v;
+ data_reference_p key = (*map)->key;
+ opencl_data data = (*map)->value;
+ fprintf (file, "key::\n");
+ dump_data_reference (file, key);
+ fprintf (file, "data_id = %d\n\n", data->id);
+ return 1;
+}
+
+void
+dump_opencl_main (opencl_main code_gen, FILE * file, bool verbose)
+{
+ fprintf (file, "Current meta::\n");
+ dump_opencl_clast_meta (code_gen->curr_meta, file, false, 2);
+ fprintf (file, "\n");
+ if (code_gen->current_body)
+ {
+ fprintf (file, "Current body::\n");
+ dump_opencl_body (code_gen->current_body, file, verbose);
+ }
+ fprintf (file, "\n\nData init basic block::\n");
+ dump_bb (code_gen->data_init_bb, stderr, 0);
+
+ if (code_gen->defined_vars)
+ {
+ fprintf (file, "Defined variables::\n");
+ htab_traverse_noresize (code_gen->defined_vars, print_char_p_htab,
+ file);
+ }
+
+ if (code_gen->global_defined_vars)
+ {
+ fprintf (file, "Global defined variables::\n");
+ htab_traverse_noresize (code_gen->global_defined_vars,
+ print_char_p_htab, file);
+ }
+ fprintf (file, "Refs to data::\n");
+ htab_traverse_noresize (code_gen->ref_to_data,
+ print_ref_to_data_htab, file);
+
+ fprintf (file, "Trees to data::\n");
+ htab_traverse_noresize (code_gen->tree_to_data,
+ print_tree_to_data_htab, file);
+
+ if (verbose)
+ fprintf (file, "%s\n", dyn_string_buf (code_gen->main_program));
+}
+
+DEBUG_FUNCTION void
+debug_opencl_main (opencl_main code_gen, bool verbose)
+{
+ dump_opencl_main (code_gen, stderr, verbose);
+}
+
+DEBUG_FUNCTION void
+debug_opencl_program (void)
+{
+ fprintf (stderr, "%s", dyn_string_buf (main_program_src));
+}
+
+#endif
+#include "gt-graphite-opencl.h"
new file mode 100644
@@ -0,0 +1,254 @@
+/* GRAPHITE-OpenCL pass.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+#define OPENCL_INIT_BUFF_SIZE 40
+
+/* Data structure which holds information about single array or pointer
+ in current scop. */
+struct opencl_data_def
+{
+ /* Base object. In case of arrays base array replaced by pointer to this
+ array. */
+ tree object;
+ /* Device object, corresponding to base object. */
+ tree device_object;
+ /* Size of data references in scop. */
+ tree size_value;
+
+ /* Variable to hold size_value. */
+ tree size_variable;
+
+ /* Unique id. */
+ int id;
+
+ /* If true, then given data represents static object. */
+ bool is_static;
+
+ /* Exact object represented by given data. */
+ tree exact_object;
+
+ /* If true, given data can be privatized in current context. */
+ bool can_be_private;
+ /* Current state. */
+ bool up_to_date_on_device;
+ bool up_to_date_on_host;
+
+ bool inited_in_current_loop_on_host;
+ bool inited_in_current_loop_on_device;
+
+ bool written_in_current_body;
+ bool read_in_current_body;
+
+ bool privatized;
+
+ /* Data dimension. */
+ int data_dim;
+ int depth;
+
+ /* Access information. */
+ bool used_on_device;
+ bool ever_read_on_device;
+ bool ever_written_on_device;
+
+ bool supported;
+};
+
+typedef struct opencl_data_def * opencl_data;
+typedef const char * char_p;
+
+DEF_VEC_P (opencl_data);
+DEF_VEC_ALLOC_P (opencl_data, heap);
+
+/* Data structure with meta information about clast statement.
+ |
+ | LOOP_1 {out_depth = 0, in_depth = 3}
+ | / \
+ | / \
+ | / \
+ | / \
+ | {out_depth = 1, LOOP_2 STMT_1 {in_depth = 0,
+ | in_depth = 2} out_depth = 1}
+ | |
+ | |
+ | |
+ | LOOP_3 {out_depth = 2, in_depth = 1}
+ | |
+ | |
+ | |
+ | STMT_2 {out_depth = 3, in_depth = 3}
+*/
+struct opencl_clast_meta_def
+{
+ /* Outer depth. */
+ int out_depth;
+ /* Inner depth. */
+ int in_depth;
+ /* True means, that statement will be executed on device. */
+ bool on_device;
+ struct opencl_clast_meta_def * next;
+ struct opencl_clast_meta_def * body;
+ struct opencl_clast_meta_def * parent;
+
+ /* All init code for this statement will be placed on this edge. */
+ edge init_edge;
+
+ /* Data modifications in this statement (including children). */
+ bitmap modified_on_device;
+ bitmap modified_on_host;
+
+ /* Data, which can be private in current loop. */
+ bitmap can_be_private;
+
+ /* Access information. */
+ bool access_unsupported;
+ bitmap access;
+
+ VEC (opencl_data, heap) *post_pass_to_host;
+ VEC (opencl_data, heap) *post_pass_to_device;
+};
+
+typedef struct opencl_clast_meta_def *opencl_clast_meta;
+
+DEF_VEC_P (char_p);
+DEF_VEC_ALLOC_P (char_p, heap);
+
+/* Single opencl kernel. */
+struct graphite_opencl_kernel_body
+{
+ /* Function body. */
+ dyn_string_t body;
+
+ /* Function header. */
+ dyn_string_t header;
+
+ /* Variables declarations. */
+ dyn_string_t pre_header;
+
+ /* Number of executions for kernel. */
+ tree num_of_exec;
+
+ dyn_string_t non_scalar_args;
+
+ /* Number of write data references in kernel. */
+ int num_of_data_writes;
+
+
+ /* Clast_stmt corresponding to kernel. */
+ struct clast_stmt *clast_body;
+
+ /* First iterator (scat_i). */
+ const char *first_iter;
+
+ /* First iterator (scat_{i+n}). */
+ const char *last_iter;
+
+ /* Kernel name. All kernels have names opencl_auto_function_<num>. */
+ char name[40];
+
+ /* Variables, which must be passed to kernel. */
+ VEC (tree, heap) *function_args;
+ VEC (tree, heap) *function_args_to_pass;
+ VEC (opencl_data, heap) *data_refs;
+};
+
+typedef struct graphite_opencl_kernel_body *opencl_body;
+
+
+/* Main data structure for translating clast to gimple with opencl
+ function calls. */
+struct graphite_opencl_creator
+{
+ /* Array with scat_* (iterators from clast data structures) names. */
+ CloogNames *root_names;
+
+ /* Current kernel. */
+ opencl_body current_body;
+
+ /* Current region. */
+ sese region;
+
+ /* Meta information for current scop. */
+ opencl_clast_meta clast_meta;
+
+ /* Current meta statement. */
+ opencl_clast_meta curr_meta;
+
+ /* Htab of all defined local vars (not tmp vars, generated by gimplify). */
+ htab_t defined_vars;
+
+ /* Htab of all defined global vars. */
+ htab_t global_defined_vars;
+
+ /* Current edge. */
+ edge main_edge;
+
+ /* Edge with kernels executing. */
+ edge kernel_edge;
+
+ /* Current opencl program. */
+ dyn_string_t main_program;
+
+ /* Information from clast structures. */
+ htab_t newivs_index;
+ htab_t params_index;
+ VEC (tree, heap) *newivs;
+
+ /* Current loop. */
+ loop_p context_loop;
+
+ /* Basic block with init statements for data. */
+ basic_block data_init_bb;
+
+ /* Data used in kernel. */
+ VEC(opencl_data, heap) *opencl_function_data;
+
+ VEC(tree, heap) *iv_map;
+
+ htab_t ref_to_data;
+ htab_t tree_to_data;
+};
+
+typedef struct graphite_opencl_creator * opencl_main;
+
+extern opencl_body opencl_clast_to_kernel (struct clast_for *,
+ opencl_main, int);
+extern tree dr_outermost_base_object (data_reference_p);
+extern void dump_opencl_main (opencl_main, FILE *, bool);
+extern void dump_opencl_body (opencl_body, FILE *, bool);
+extern void dump_opencl_clast_meta (opencl_clast_meta, FILE *, bool, int);
+extern void dump_opencl_data (opencl_data, FILE *, bool);
+extern void debug_opencl_main (opencl_main, bool);
+extern void debug_opencl_program (void);
+extern void debug_opencl_body (opencl_body, bool);
+extern void debug_opencl_clast_meta (opencl_clast_meta, bool);
+extern void debug_opencl_data (opencl_data, bool);
+
+/* Find opencl_data object by host object. */
+extern opencl_data opencl_get_data_by_data_ref (opencl_main, data_reference_p);
+extern opencl_data opencl_get_data_by_tree (opencl_main, tree);
+extern tree opencl_tree_to_var (basic_block, tree);
+extern opencl_clast_meta opencl_create_meta_from_clast (opencl_main,
+ struct clast_stmt *,
+ int, opencl_clast_meta);
+extern bool opencl_should_be_parallel_p (opencl_main, opencl_clast_meta, int);
+
+/* Create new basic block on main edge and update main_edge. */
+extern basic_block opencl_create_bb (opencl_main);
+extern bool dependency_in_clast_loop_p (opencl_main, opencl_clast_meta,
+ struct clast_for *, int);
+extern tree opencl_get_base_object_by_tree (tree);
@@ -281,10 +281,16 @@ graphite_transform_loops (void)
{
build_poly_scop (scop);
+ if (flag_graphite_opencl)
+ graphite_opencl_init ();
+
if (POLY_SCOP_P (scop)
&& apply_poly_transforms (scop)
&& gloog (scop, bb_pbb_mapping))
need_cfg_cleanup_p = true;
+
+ if (flag_graphite_opencl)
+ graphite_opencl_finalize (single_succ_edge (ENTRY_BLOCK_PTR));
}
htab_delete (bb_pbb_mapping);
@@ -308,7 +308,8 @@ gate_graphite_transforms (void)
|| flag_loop_strip_mine
|| flag_graphite_identity
|| flag_loop_parallelize_all
- || flag_loop_flatten)
+ || flag_loop_flatten
+ || flag_graphite_opencl)
flag_graphite = 1;
return flag_graphite != 0;