From patchwork Sat Dec 25 06:26:52 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sebastian Pop X-Patchwork-Id: 76665 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 075C8B70DF for ; Sat, 25 Dec 2010 17:28:47 +1100 (EST) Received: (qmail 799 invoked by alias); 25 Dec 2010 06:28:46 -0000 Received: (qmail 32471 invoked by uid 22791); 25 Dec 2010 06:28:25 -0000 X-SWARE-Spam-Status: No, hits=1.4 required=5.0 tests=AWL, BAYES_50, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, FSL_RU_URL, RCVD_IN_DNSWL_LOW, TW_CF, TW_DB, TW_GB, TW_TM, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-gy0-f175.google.com (HELO mail-gy0-f175.google.com) (209.85.160.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sat, 25 Dec 2010 06:27:54 +0000 Received: by mail-gy0-f175.google.com with SMTP id 20so3336941gyh.20 for ; Fri, 24 Dec 2010 22:27:54 -0800 (PST) Received: by 10.236.109.166 with SMTP id s26mr18440529yhg.76.1293258472640; Fri, 24 Dec 2010 22:27:52 -0800 (PST) Received: from napoca ([75.54.87.199]) by mx.google.com with ESMTPS id 54sm5472733yhl.32.2010.12.24.22.27.44 (version=TLSv1/SSLv3 cipher=RC4-MD5); Fri, 24 Dec 2010 22:27:51 -0800 (PST) Received: by napoca (sSMTP sendmail emulation); Sat, 25 Dec 2010 00:27:42 -0600 From: Sebastian Pop To: gcc-patches@gcc.gnu.org Cc: gcc-graphite@googlegroups.com, amonakov@ispras.ru, kayrick@ispras.ru, abel@ispras.ru, basile@starynkevitch.net, grosser@fim.uni-passau.de, Sebastian Pop Subject: [PATCH 3/4] Code generation for OpenCL. Date: Sat, 25 Dec 2010 00:26:52 -0600 Message-Id: <1293258413-29902-4-git-send-email-sebpop@gmail.com> In-Reply-To: References: X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org 2010-12-25 Alexey Kravets * Makefile.in (OBJS-common): Add graphite-opencl-codegen.o, graphite-opencl-meta-clast.o and graphite-opencl.o. (graphite-opencl-codegen.o): New. (graphite-opencl-meta-clast.o): New. (graphite-opencl.o): New. (GTFILES): Add graphite-opencl.c. * common.opt (fgraphite-opencl): New. (fgraphite-opencl-cpu): New. (fgraphite-opencl-ignore-depth-heuristic): New. (fgraphite-opencl-ignore-mem-heuristic): New. (fgraphite-opencl-ignore-dep): New. (fgraphite-opencl-ignore-types): New. (fgraphite-opencl-depth-base): New. (fgraphite-opencl-debug): New. * graphite-opencl-codegen.c: New file. * graphite-opencl-meta-clast.c: New file. * graphite-opencl.c: New file. * graphite-opencl.h: New file. * graphite.c (graphite_transform_loops): Call graphite_opencl_init and graphite_opencl_finalize. * tree-ssa-loop.c (gate_graphite_transforms): Handle flag_graphite_opencl. * graphite-clast-to-gimple.c (gloog): Call opencl_transform_clast. * dbgcnt.def (opencl_scop_cnt): New counter. --- gcc/ChangeLog.graphite | 27 + gcc/Makefile.in | 28 + gcc/common.opt | 32 + gcc/dbgcnt.def | 1 + gcc/graphite-clast-to-gimple.c | 14 +- gcc/graphite-clast-to-gimple.h | 6 + gcc/graphite-opencl-codegen.c | 1916 +++++++++++++++++++++++++ gcc/graphite-opencl-meta-clast.c | 784 ++++++++++ gcc/graphite-opencl.c | 2913 ++++++++++++++++++++++++++++++++++++++ gcc/graphite-opencl.h | 254 ++++ gcc/graphite.c | 6 + gcc/tree-ssa-loop.c | 3 +- 12 files changed, 5979 insertions(+), 5 deletions(-) create mode 100644 gcc/graphite-opencl-codegen.c create mode 100644 gcc/graphite-opencl-meta-clast.c create mode 100644 gcc/graphite-opencl.c create mode 100644 gcc/graphite-opencl.h diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite index d667086..b9afdb8 100644 --- a/gcc/ChangeLog.graphite +++ b/gcc/ChangeLog.graphite @@ -1,3 +1,30 @@ +2010-12-25 Alexey Kravets + + * Makefile.in (OBJS-common): Add graphite-opencl-codegen.o, + graphite-opencl-meta-clast.o and graphite-opencl.o. + (graphite-opencl-codegen.o): New. + (graphite-opencl-meta-clast.o): New. + (graphite-opencl.o): New. + (GTFILES): Add graphite-opencl.c. + * common.opt (fgraphite-opencl): New. + (fgraphite-opencl-cpu): New. + (fgraphite-opencl-ignore-depth-heuristic): New. + (fgraphite-opencl-ignore-mem-heuristic): New. + (fgraphite-opencl-ignore-dep): New. + (fgraphite-opencl-ignore-types): New. + (fgraphite-opencl-depth-base): New. + (fgraphite-opencl-debug): New. + * graphite-opencl-codegen.c: New file. + * graphite-opencl-meta-clast.c: New file. + * graphite-opencl.c: New file. + * graphite-opencl.h: New file. + * graphite.c (graphite_transform_loops): Call graphite_opencl_init and + graphite_opencl_finalize. + * tree-ssa-loop.c (gate_graphite_transforms): Handle + flag_graphite_opencl. + * graphite-clast-to-gimple.c (gloog): Call opencl_transform_clast. + * dbgcnt.def (opencl_scop_cnt): New counter. + 2010-12-25 Alexander Monakov * graphite-dependences.c (graphite_outer_subscript_bound): New. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 74fe0e8..6106331 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1251,6 +1251,9 @@ OBJS-common = \ graph.o \ graphds.o \ graphite.o \ + graphite-opencl-codegen.o \ + graphite-opencl-meta-clast.o \ + graphite-opencl.o \ graphite-blocking.o \ graphite-clast-to-gimple.o \ graphite-cloog-util.o \ @@ -2692,6 +2695,30 @@ graphite-blocking.o: graphite-blocking.c $(CONFIG_H) $(SYSTEM_H) \ $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h \ graphite.h graphite-poly.h graphite-ppl.h +graphite-opencl-codegen.o: graphite-opencl-codegen.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) \ + $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \ + $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \ + $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\ + pointer-set.h value-prof.h graphite-ppl.h sese.h \ + graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \ + graphite-dependences.h +graphite-opencl-meta-clast.o: graphite-opencl-meta-clast.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) \ + $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \ + $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \ + $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\ + pointer-set.h value-prof.h graphite-ppl.h sese.h \ + graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \ + graphite-dependences.h +graphite-opencl.o: graphite-opencl.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) \ + $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \ + $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \ + $(TREE_DATA_REF_H) tree-pass.h graphite.h graphite-opencl.h\ + pointer-set.h value-prof.h graphite-ppl.h sese.h \ + graphite-scop-detection.h graphite-clast-to-gimple.h graphite-poly.h \ + graphite-dependences.h graphite-clast-to-gimple.o: graphite-clast-to-gimple.c $(CONFIG_H) \ $(SYSTEM_H) coretypes.h $(TM_H) langhooks.h \ $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(DIAGNOSTIC_CORE_H) \ @@ -3803,6 +3830,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/lto-symtab.c \ $(srcdir)/tree-ssa-alias.h \ $(srcdir)/ipa-prop.h \ + $(srcdir)/graphite-opencl.c \ $(srcdir)/lto-streamer.h \ $(srcdir)/target-globals.h \ @all_gtfiles@ diff --git a/gcc/common.opt b/gcc/common.opt index 8ccbca3..68cafb4 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1119,6 +1119,38 @@ floop-parallelize-all Common Report Var(flag_loop_parallelize_all) Optimization Mark all loops as parallel +fgraphite-opencl +Common Report Var(flag_graphite_opencl) Optimization +Export OpenCL from graphite + +fgraphite-opencl-cpu +Common Report Var(flag_graphite_opencl_cpu) Optimization +Generate CPU oriented OpenCL code. + +fgraphite-opencl-ignore-depth-heuristic +Common Report Var(flag_graphite_opencl_no_depth_check) +Ignore depth heuristic in graphite-opencl pass. + +fgraphite-opencl-ignore-mem-heuristic +Common Report Var(flag_graphite_opencl_no_memory_transfer_check) +Ignore mem transfer heuristic in graphite-opencl pass. + +fgraphite-opencl-ignore-dep +Common Report Var(flag_graphite_opencl_no_dep_check) +Ignore dependency checking in graphite-opencl pass. + +fgraphite-opencl-ignore-types +Common Report Var(flag_graphite_opencl_no_types_check) +Ignore supported type checking in graphite-opencl pass. + +fgraphite-opencl-depth-base= +Common RejectNegative Joined UInteger Init(0) Var(opencl_base_depth_const) +Value for depth heuristic in graphite-opencl pass. + +fgraphite-opencl-debug +Common Report Var(flag_graphite_opencl_debug) +Add checks for opencl calls return values. + floop-strip-mine Common Report Var(flag_loop_strip_mine) Optimization Enable Loop Strip Mining transformation diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def index 0492d66..c150710 100644 --- a/gcc/dbgcnt.def +++ b/gcc/dbgcnt.def @@ -184,3 +184,4 @@ DEBUG_COUNTER (sms_sched_loop) DEBUG_COUNTER (store_motion) DEBUG_COUNTER (split_for_sched2) DEBUG_COUNTER (tail_call) +DEBUG_COUNTER (opencl_scop_cnt) diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c index 9c732aa..2d95144 100644 --- a/gcc/graphite-clast-to-gimple.c +++ b/gcc/graphite-clast-to-gimple.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "langhooks.h" #include "sese.h" +#include "dbgcnt.h" #ifdef HAVE_cloog #include "cloog/cloog.h" @@ -1517,10 +1518,15 @@ gloog (scop_p scop, htab_t bb_pbb_mapping) create_params_index (params_index, pc.prog); - translate_clast (region, context_loop, pc.stmt, - if_region->true_region->entry, - &newivs, newivs_index, - bb_pbb_mapping, 1, params_index); + if (flag_graphite_opencl && dbg_cnt (opencl_scop_cnt)) + opencl_transform_clast (pc.stmt, region, if_region->true_region->entry, + scop, params_index); + else + translate_clast (region, context_loop, pc.stmt, + if_region->true_region->entry, + &newivs, newivs_index, + bb_pbb_mapping, 1, params_index); + graphite_verify (); scev_reset (); recompute_all_dominators (); diff --git a/gcc/graphite-clast-to-gimple.h b/gcc/graphite-clast-to-gimple.h index 20c486c..c072918 100644 --- a/gcc/graphite-clast-to-gimple.h +++ b/gcc/graphite-clast-to-gimple.h @@ -37,6 +37,12 @@ typedef struct bb_pbb_def poly_bb_p pbb; }bb_pbb_def; +/* From graphite-opencl.c */ +extern void opencl_transform_clast (struct clast_stmt *, sese, edge, + scop_p, htab_t); +extern void graphite_opencl_finalize (edge); +extern void graphite_opencl_init (void); + /* From graphite-clast-to-gimple.c */ extern bool gloog (scop_p, htab_t); extern cloog_prog_clast scop_to_clast (scop_p, CloogState *); diff --git a/gcc/graphite-opencl-codegen.c b/gcc/graphite-opencl-codegen.c new file mode 100644 index 0000000..2f07fe1 --- /dev/null +++ b/gcc/graphite-opencl-codegen.c @@ -0,0 +1,1916 @@ +/* OpencCL code generation for GRAPHITE-OpenCL. + Copyright (C) 2009, 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* OpenCL code genration for GRAPHITE-OpenCL. This file implements + OpenCL kernel code generation from CLAST structures. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "toplev.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "domwalk.h" +#include "value-prof.h" +#include "pointer-set.h" +#include "gimple.h" +#include "sese.h" +#include "output.h" +#include +#include "hashtab.h" +#include "gimple-pretty-print.h" +#include "tree.h" + +#ifdef HAVE_cloog +#include "cloog/cloog.h" +#include "ppl_c.h" +#include "graphite-ppl.h" +#include "graphite.h" +#include "graphite-poly.h" +#include "graphite-scop-detection.h" +#include "graphite-clast-to-gimple.h" +#include "graphite-dependences.h" +#include "dyn-string.h" +#include "graphite-opencl.h" + + +/* These functions implement code generation from different clast + structures. */ +static void opencl_print_stmt_list (struct clast_stmt *, opencl_main, int); +static void opencl_print_for (struct clast_for *, opencl_main, int); +static void opencl_print_guard (struct clast_guard *, opencl_main, int); +static void opencl_print_equation (struct clast_equation *, opencl_main); +static void opencl_print_expr (struct clast_expr *, opencl_main); +static void opencl_add_variable (const char *, tree, opencl_main); +static void opencl_print_term (struct clast_term *, opencl_main); +static void opencl_print_reduction (struct clast_reduction *, opencl_main); +static void opencl_print_sum (struct clast_reduction *, opencl_main); +static void opencl_print_binary (struct clast_binary *, opencl_main); +static void opencl_print_minmax_c (struct clast_reduction *, opencl_main); + +/* These function implement code generation from different gimple + objects. */ +static void opencl_print_bb (basic_block, opencl_main); +static void opencl_print_gimple_assign_operation (gimple, opencl_main); +static void opencl_print_gimple_assign (gimple, opencl_main); +static void opencl_print_gimple (gimple, opencl_main); +static int opencl_print_operand (tree, bool, opencl_main); + + +static void opencl_print_local_vars (const char *, const char *, const char *, + opencl_main); +static void opencl_try_variable (opencl_main, tree); +static const char *opencl_get_var_name (tree); +static void opencl_build_defines (tree, opencl_main); +static void opencl_expand_scalar_vars (opencl_main, gimple); +static void opencl_add_function_arg (opencl_main, tree, const char *); +static void opencl_add_data_refs_pbb (poly_bb_p, opencl_main); +static void opencl_add_non_scalar_type_decl (tree, dyn_string_t, const char *); +static const char *opencl_print_function_arg_with_type (const char *, tree); +static bool check_and_mark_arg (opencl_main, const char *, bool); + + +/* Compare two clast names based on their indexes. */ + +static int +opencl_cmp_scat (const char *scat1, const char *scat2) +{ + int len_1 = strlen (scat1); + int len_2 = strlen (scat2); + + if (len_1 > len_2) + return 1; + + if (len_1 < len_2) + return -1; + + return strcmp (scat1, scat2); +} + +/* This function implements !strcmp (STR1, STR2) call. */ + +static int +opencl_cmp_str (const void *str1, const void *str2) +{ + const char *c_str1 = (const char *) str1; + const char *c_str2 = (const char *) str2; + return !strcmp (c_str1, c_str2); +} + +/* Get body of generating kernel function from CODE_GEN. */ + +static dyn_string_t +opencl_get_current_body (opencl_main code_gen) +{ + return code_gen->current_body->body; +} + +/* Get header of generating kernel function from CODE_GEN. */ + +static dyn_string_t +opencl_get_current_header (opencl_main code_gen) +{ + return code_gen->current_body->header; +} + +/* Appends a string STR to the header of the generating kernel + function from CODE_GEN. */ + +static void +opencl_append_string_to_header (const char *str, opencl_main code_gen) +{ + dyn_string_t tmp = opencl_get_current_header (code_gen); + dyn_string_append_cstr (tmp, str); +} + +/* Appends a string STR to the body of the generating kernel function + from CODE_GEN. */ + +static void +opencl_append_string_to_body (const char *str, opencl_main code_gen) +{ + dyn_string_t tmp = opencl_get_current_body (code_gen); + dyn_string_append_cstr (tmp, str); +} + +/* Appends an integer NUM to the string STR following FORMAT. */ + +static void +opencl_append_int_to_str (dyn_string_t str, long num, const char *format) +{ + char tmp[100]; + sprintf (tmp, format, num); + dyn_string_append_cstr (str, tmp); +} + +/* Appends an integer NUM to the header of the generating kernel + function from CODE_GEN following FORMAT. */ + +static void +opencl_append_num_to_header (opencl_main code_gen, long num, + const char *format) +{ + dyn_string_t tmp = opencl_get_current_header (code_gen); + opencl_append_int_to_str (tmp, num, format); +} + +/* Appends an integer NUM to the body of the generating kernel + function from CODE_GEN following FORMAT. */ + +static void +opencl_append_num_to_body (opencl_main code_gen, long num, const char *format) +{ + dyn_string_t tmp = opencl_get_current_body (code_gen); + opencl_append_int_to_str (tmp, num, format); +} + +/* Get TYPE scalar (base) part. */ + +static tree +opencl_get_main_type (tree type) +{ + while (TREE_CODE (type) == ARRAY_TYPE + || TREE_CODE (type) == POINTER_TYPE) + type = TREE_TYPE (type); + return build_pointer_type (type); +} + + +/* Create the base part of FUNCTION declaration, similar to this: + "__global void __opencl_function_0". */ + +static void +opencl_create_function_code (opencl_body function) +{ + static int opencl_function_counter = 0; + dyn_string_t dest = function->header; + dyn_string_append_cstr (dest, "__kernel void"); + dyn_string_append_cstr (dest, " "); + dyn_string_append_cstr (dest, "opencl_auto_function_"); + opencl_append_int_to_str (dest, opencl_function_counter, "%ld"); + dyn_string_append_cstr (dest, " ("); + sprintf (function->name, "%s%d","opencl_auto_function_", + opencl_function_counter++); +} + +/* Create new instance of opencl_body. */ + +static opencl_body +opencl_body_create (void) +{ + opencl_body tmp = XNEW (struct graphite_opencl_kernel_body); + + tmp->body = dyn_string_new (OPENCL_INIT_BUFF_SIZE); + tmp->pre_header = dyn_string_new (OPENCL_INIT_BUFF_SIZE); + tmp->header = dyn_string_new (OPENCL_INIT_BUFF_SIZE); + tmp->non_scalar_args = dyn_string_new (OPENCL_INIT_BUFF_SIZE); + + tmp->num_of_data_writes = 0; + tmp->function_args = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE); + tmp->function_args_to_pass = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE); + tmp->data_refs = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE); + opencl_create_function_code (tmp); + + return tmp; +} + +/* Check whether clast expression EXPT is constant in current loop nest. + FIRST_SCAT is the iterator of outermost loop in current loop nest. */ + +static bool +opencl_constant_expression_p (struct clast_expr *expr, const char *first_scat) +{ + switch (expr->type) + { + case expr_term: + { + struct clast_term *term = (struct clast_term *) expr; + if (!(term->var)) + return true; + { + const char *name = term->var; + if (strstr (name, "scat_") != name) + return true; + + return (opencl_cmp_scat (first_scat, name) == 1); + } + } + case expr_red: + { + struct clast_reduction *red = (struct clast_reduction *) expr; + int i; + for (i = 0; i < red->n; i++) + if (!opencl_constant_expression_p (red->elts [i], first_scat)) + return false; + + return true; + } + case expr_bin: + { + struct clast_binary *bin = (struct clast_binary *) expr; + return opencl_constant_expression_p (bin->LHS, first_scat); + } + default: + gcc_unreachable (); + return false; + } +} + +/* Check whether the clast_for LOOP has constant bounds. FIRST_SCAT + is the iterator of outermost loop in current loop nest. */ + +static bool +opencl_constant_loop_bound_p (struct clast_for *loop, const char *first_scat) +{ + return opencl_constant_expression_p (loop->UB, first_scat) + && opencl_constant_expression_p (loop->LB, first_scat); +} + +/* If clast loop PARENT has only one child and it's a loop too, return + this child. Otherwise return NULL. */ + +static struct clast_for * +opencl_get_single_loop_child (struct clast_for *parent) +{ + struct clast_stmt *body = parent->body; + + if (body->next + || !CLAST_STMT_IS_A (body, stmt_for)) + return NULL; + + return (struct clast_for *) body; +} + +/* Calculate the maximal depth of a perfect nested loop nest with LOOP + as outermost loop. META holds meta information for loop LOOP, + DEPTH is the depth of LOOP in current loop nest, FIRST_SCAT is the + iterator of outermost loop in current loop nest. CODE_GEN holds + information related to OpenCL code generation. */ + +static int +opencl_get_perfect_nested_loop_depth (opencl_main code_gen, + opencl_clast_meta meta, + struct clast_for *loop, + int depth, const char *first_scat) +{ + struct clast_for *child; + if (dependency_in_clast_loop_p (code_gen, meta, loop, depth)) + return 0; + + child = opencl_get_single_loop_child (loop); + + if (!child + || !opencl_constant_loop_bound_p (child, first_scat)) + return 1; + + return 1 + opencl_get_perfect_nested_loop_depth (code_gen, meta->body, child, + depth + 1, first_scat); +} + +/* Get the type of the loop iterator for loop, represented by STMT. + LEVEL is the depth of this loop in current loop nest. CODE_GEN + holds information related to OpenCL code generation. */ + +static tree +opencl_get_loop_iter_type (struct clast_for *stmt, opencl_main code_gen, + int level) +{ + tree lb_type = gcc_type_for_clast_expr (stmt->LB, code_gen->region, + code_gen->newivs, + code_gen->newivs_index, + code_gen->params_index); + tree ub_type = gcc_type_for_clast_expr (stmt->UB, code_gen->region, + code_gen->newivs, + code_gen->newivs_index, + code_gen->params_index); + tree type = gcc_type_for_iv_of_clast_loop (stmt, level, lb_type, ub_type); + + return type; +} + +static const char *data_type; + +/* Simplified version of C-style type printing from c-aux-info.c. */ + +static const char * +gen_type_1 (const char *ret_val, tree t) +{ + switch (TREE_CODE (t)) + { + case POINTER_TYPE: + if (TYPE_READONLY (t)) + ret_val = concat ("const ", ret_val, NULL); + if (TYPE_VOLATILE (t)) + ret_val = concat ("volatile ", ret_val, NULL); + + ret_val = concat ("*", ret_val, NULL); + + if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE + || TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE) + ret_val = concat ("(", ret_val, ")", NULL); + + ret_val = gen_type_1 (ret_val, TREE_TYPE (t)); + + return ret_val; + + case ARRAY_TYPE: + if (!COMPLETE_TYPE_P (t) || TREE_CODE (TYPE_SIZE (t)) != INTEGER_CST) + ret_val = gen_type_1 (concat (ret_val, "[]", NULL), TREE_TYPE (t)); + else if (int_size_in_bytes (t) == 0) + ret_val = gen_type_1 (concat (ret_val, "[0]", NULL), TREE_TYPE (t)); + else + { + int size = int_size_in_bytes (t) / int_size_in_bytes (TREE_TYPE (t)); + char buff[10]; + sprintf (buff, "[%d]", size); + ret_val = gen_type_1 (concat (ret_val, buff, NULL), TREE_TYPE (t)); + } + break; + + case IDENTIFIER_NODE: + data_type = IDENTIFIER_POINTER (t); + break; + + case TYPE_DECL: + data_type = IDENTIFIER_POINTER (DECL_NAME (t)); + break; + + case INTEGER_TYPE: + case FIXED_POINT_TYPE: + switch (TYPE_PRECISION (t)) + { + case 8: data_type = "char"; break; + case 16: data_type = "short"; break; + case 32: data_type = "int"; break; + case 64: data_type = "long"; break; + default: gcc_unreachable (); + } + if (TYPE_UNSIGNED (t)) + data_type = concat ("unsigned ", data_type, NULL); + break; + + case REAL_TYPE: + switch (TYPE_PRECISION (t)) + { + case 32: data_type = "float"; break; + case 64: data_type = "double"; break; + default: gcc_unreachable (); + } + break; + + case VOID_TYPE: + data_type = "void"; + break; + + default: + gcc_unreachable (); + } + + if (TYPE_READONLY (t)) + ret_val = concat ("const ", ret_val, NULL); + if (TYPE_VOLATILE (t)) + ret_val = concat ("volatile ", ret_val, NULL); + if (TYPE_RESTRICT (t)) + ret_val = concat ("restrict ", ret_val, NULL); + return ret_val; +} + +/* Generate a string representation of a declaration of varable named + NAME with type T. */ + +static const char * +gen_type_with_name (const char *name, tree t) +{ + const char *type_part = gen_type_1 (name, t); + return concat (data_type, " ", type_part, NULL); +} + +/* Replace perfect nested loop nest represented by F with opencl kernel. + For example, loop nest like this + + | for (scat_1 = 0; scat_1 < M; i ++) + | for (scat_2 = 0; scat_2 < N; j ++) + | for (scat_3 = 0; scat_3 < L; k ++) + | stmt (i, j, k) + + will be replased by kernel, where scat_1, scat_2, scat_3 + depends on thread global id. Number of threads for this kernel + will be M * N * L. DEPTH is the depth of F in current loop nest. + CODE_GEN holds information related to OpenCL code generation. BODY holds + information current OpenCL kernel. */ + +static void +opencl_perfect_nested_to_kernel (opencl_main code_gen, struct clast_for *f, + opencl_body body, int depth) +{ + VEC (tree, heap) *mod = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE); + + VEC (tree, heap) *function_args = body->function_args; + const int perfect_depth + = opencl_get_perfect_nested_loop_depth (code_gen, code_gen->curr_meta, f, + depth, f->iterator); + struct clast_for *curr = f; + int counter = perfect_depth; + tree curr_base = integer_one_node; + basic_block calc_block = opencl_create_bb (code_gen); + opencl_append_string_to_body + ("size_t opencl_global_id = get_global_id (0);\n", code_gen); + + body->first_iter = curr->iterator; + + /* Iterate through all loops, which form perfect loop nest. */ + while (counter--) + { + tree iv; + sese region = code_gen->region; + VEC (tree, heap) *newivs = code_gen->newivs; + htab_t newivs_index = code_gen->newivs_index; + htab_t params_index = code_gen->params_index; + const char *tmp; + const char *decl; + + tree type = opencl_get_loop_iter_type (curr, code_gen, depth); + + const char *type_str = gen_type_with_name (" ", type); + + tree low_bound = clast_to_gcc_expression (type, curr->LB, region, + newivs, newivs_index, + params_index); + + tree up_bound = clast_to_gcc_expression (type, curr->UB, region, + newivs, newivs_index, + params_index); + long stride = 1; + tree t_stride; + tree curr_loop_size; + int num = perfect_depth - counter - 1; + + gcc_assert (curr->LB); + gcc_assert (curr->UB); + + body->last_iter = curr->iterator; + + low_bound = opencl_tree_to_var (calc_block, low_bound); + up_bound = opencl_tree_to_var (calc_block, up_bound); + + if (mpz_cmp_si (curr->stride, 1) > 0) + stride = mpz_get_si (curr->stride); + + t_stride = build_int_cst (NULL_TREE, stride); + + curr_loop_size = build2 (MINUS_EXPR, type, + up_bound, low_bound); + + curr_loop_size = build2 (PLUS_EXPR, type, + curr_loop_size, + fold_convert (type, integer_one_node)); + + if (stride != 1) + curr_loop_size + = build2 (TRUNC_DIV_EXPR, type, + curr_loop_size, t_stride); + + curr_loop_size = opencl_tree_to_var (calc_block, curr_loop_size); + + VEC_safe_push (tree, heap, mod, curr_loop_size); + + iv = create_tmp_var (type, "scat_tmp_iter"); + + /* Declare loop iterator as local variable. */ + opencl_append_string_to_body (type_str, code_gen); + opencl_append_string_to_body (curr->iterator, code_gen); + + /* Calculate the value of the iterator of current loop based of the + number of current thread and numbers of iterators of inner loops. */ + opencl_append_string_to_body (" = ((opencl_global_id / ", code_gen); + opencl_append_num_to_body (code_gen, num, "opencl_base_%d"); + opencl_append_string_to_body (") % ", code_gen); + opencl_append_num_to_body (code_gen, num, "opencl_mod_%d"); + opencl_append_string_to_body (") * ", code_gen); + opencl_append_num_to_body (code_gen, stride, "%d"); + opencl_append_num_to_body (code_gen, num, " + opencl_first_%d;\n"); + + opencl_append_string_to_header (type_str, code_gen); + opencl_append_num_to_header (code_gen, num, "opencl_mod_%d, "); + opencl_append_string_to_header (type_str, code_gen); + opencl_append_num_to_header (code_gen, num, "opencl_first_%d, "); + + /* Declare old loop iterator. */ + tmp = opencl_get_var_name (iv); + check_and_mark_arg (code_gen, tmp, false); + decl = opencl_print_function_arg_with_type (tmp, type); + opencl_append_string_to_body (decl, code_gen); + opencl_append_string_to_body (" = ", code_gen); + opencl_append_string_to_body (curr->iterator, code_gen); + opencl_append_string_to_body (";\n", code_gen); + + save_clast_name_index (code_gen->newivs_index, curr->iterator, + VEC_length (tree, code_gen->newivs)); + VEC_safe_push (tree, heap, code_gen->newivs, iv); + + /* Save number of iterations for loop. */ + VEC_safe_push (tree, heap, function_args, curr_loop_size); + VEC_safe_push (tree, heap, function_args, low_bound); + + body->clast_body = curr->body; + curr = opencl_get_single_loop_child (curr); + depth ++; + } + + counter = perfect_depth; + + /* Store number of iteration of inner loops for each loop in perfect + nest. */ + while (counter --) + { + tree type = TREE_TYPE (curr_base); + const char *type_str = gen_type_with_name (" ", type); + tree curr = VEC_index (tree, mod, counter); + + opencl_append_string_to_header (type_str, code_gen); + opencl_append_num_to_header (code_gen, counter, "opencl_base_%d, "); + + VEC_safe_push (tree, heap, function_args, curr_base); + + curr_base = build2 (MULT_EXPR, type, curr_base, + build1 (CONVERT_EXPR, type, curr)); + curr_base = opencl_tree_to_var (calc_block, curr_base); + } + + body->num_of_exec = fold_convert (integer_type_node, curr_base); + VEC_free (tree, heap, mod); +} + +/* Generate code for loop statement F. DEPTH is the depth of F in + current loop nest. CODE_GEN holds information related to OpenCL + code generation. */ + +static opencl_body +opencl_print_loop (struct clast_for *f, opencl_main code_gen, int depth) +{ + opencl_body current_body = code_gen->current_body; + + code_gen->global_defined_vars + = htab_create (10, htab_hash_string, opencl_cmp_str, NULL); + + opencl_perfect_nested_to_kernel (code_gen, f, current_body, depth); + + /* Define local loop iterators. */ + opencl_print_local_vars (current_body->first_iter, + current_body->last_iter, + "unsigned int", code_gen); + + /* Generate code for kernel body. */ + opencl_print_stmt_list (current_body->clast_body, code_gen, depth + 1); + opencl_append_string_to_body ("}\n", code_gen); + + if (current_body->num_of_data_writes) + { + dyn_string_t header = current_body->header; + int offset; + + dyn_string_append (header, current_body->non_scalar_args); + offset = dyn_string_length (header) - 2; + + if (*(dyn_string_buf (header) + offset) == ',') + *(dyn_string_buf (header) + offset) = ' '; + + opencl_append_string_to_header (")\n{\n", code_gen); + } + + return current_body; +} + +/* Generate OpenCL code for clast_assignment A. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_assignment (struct clast_assignment *a, opencl_main code_gen) +{ + /* Real assignment. */ + if (a->LHS) + { + opencl_append_string_to_body (a->LHS, code_gen); + opencl_append_string_to_body (" = ", code_gen); + } + + /* Just expression. */ + opencl_print_expr (a->RHS, code_gen); +} + +/* Return tree with variable, corresponging to given clast name NAME. + CODE_GEN holds information related to OpenCL code generation. */ + +static tree +opencl_clast_name_to_tree (opencl_main code_gen, const char *name) +{ + return clast_name_to_gcc (name, code_gen->region, code_gen->newivs, + code_gen->newivs_index, code_gen->params_index); +} + +/* For a given clast name return that name, if it's local name in + kernel body or, otherwise, name of gimple variable created for this + scat_i in gimple. CODE_GEN holds information related to OpenCL + code generation. */ + +static const char * +opencl_get_scat_real_name (opencl_main code_gen, const char *name) +{ + /* NAME > FIRST_ITER */ + if (opencl_cmp_scat (name, code_gen->current_body->first_iter) >= 0) + return name; + + return + opencl_get_var_name (opencl_clast_name_to_tree (code_gen, name)); +} + +/* Add clast variable (scat_i) as kernel argument. NAME is a new name + of loop iterator (scat_*), REAL_NAME is an old (origin) name of + loop iterator. CODE_GEN holds information related to OpenCL code + generation. */ + +static void +opencl_add_scat_as_arg (opencl_main code_gen, const char *name, + const char *real_name) +{ + tree var; + if (!check_and_mark_arg (code_gen, real_name, false)) + return; + var = opencl_clast_name_to_tree (code_gen, name); + opencl_add_function_arg (code_gen, var, real_name); +} + +/* Generate OpenCL code for user statement U. Code will be generated + from basic block, related to U. Also induction variables mapping + to old variables must be calculated to process basic block. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_user_stmt (struct clast_user_stmt *u, opencl_main code_gen) +{ + CloogStatement * cs; + poly_bb_p pbb; + gimple_bb_p gbbp; + basic_block bb; + int i; + int nb_loops = number_of_loops (); + code_gen->iv_map = VEC_alloc (tree, heap, nb_loops); + + for (i = 0; i < nb_loops; i++) + VEC_safe_push (tree, heap, code_gen->iv_map, NULL_TREE); + build_iv_mapping (code_gen->iv_map, code_gen->region, + code_gen->newivs, + code_gen->newivs_index, u, + code_gen->params_index); + + code_gen->defined_vars + = htab_create (10, htab_hash_string, opencl_cmp_str, NULL); + opencl_append_string_to_body ("{\n", code_gen); + + cs = u->statement; + pbb = (poly_bb_p) cloog_statement_usr (cs); + gbbp = PBB_BLACK_BOX (pbb); + bb = GBB_BB (gbbp); + code_gen->context_loop = bb->loop_father; + + opencl_add_data_refs_pbb (pbb, code_gen); + opencl_print_bb (bb, code_gen); + opencl_append_string_to_body ("}\n", code_gen); + htab_delete (code_gen->defined_vars); + code_gen->defined_vars = NULL; + VEC_free (tree, heap, code_gen->iv_map); +} + +/* If tree node NODE defined in current sese build and insert define + statements for it, otherwise mark node as external (parameter for + kernel). If tree defined in current sese, also recursively build + defines for all trees in definition expression. */ + +static void +opencl_build_defines (tree node, opencl_main code_gen) +{ + enum tree_code code = TREE_CODE (node); + switch (code) + { + case SSA_NAME: + { + const char *tmp = opencl_get_var_name (node); + gimple def_stmt; + + /* If name defined in other sese it is kernel's parameter. */ + if (!defined_in_sese_p (node, code_gen->region)) + return; + + /* Bail out if this name was defined earlier either in this + or other region. */ + if (*(const char **)htab_find_slot (code_gen->defined_vars, + tmp, INSERT)) + return; + + /* Get definition statement. */ + def_stmt = SSA_NAME_DEF_STMT (node); + opencl_expand_scalar_vars (code_gen, def_stmt); + opencl_print_gimple (def_stmt, code_gen); + return; + } + case ARRAY_REF: + { + tree arr = TREE_OPERAND (node, 0); + tree offset = TREE_OPERAND (node, 1); + opencl_build_defines (arr, code_gen); + opencl_build_defines (offset, code_gen); + return; + } + default: + gcc_unreachable (); + } +} + +/* For a given gimple statement STMT build definition for all names, + used in this stament. If name has been defined in other sese, mark + it as kernel parameter. CODE_GEN holds information related to + OpenCL code generation. */ + +static void +opencl_expand_scalar_vars (opencl_main code_gen, gimple stmt) +{ + ssa_op_iter iter; + use_operand_p use_p; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) + { + tree use = USE_FROM_PTR (use_p); + if (!is_gimple_reg (use)) + continue; + opencl_build_defines (use, code_gen); + } +} + +/* Generate code for a single basic block BB. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_bb (basic_block bb, opencl_main code_gen) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + opencl_expand_scalar_vars (code_gen, stmt); + opencl_print_gimple (stmt, code_gen); + } +} + +/* Print operation simbol (`+' `-' `*') for assignment operation GMA. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_gimple_assign_operation (gimple gmp, opencl_main code_gen) +{ + opencl_append_string_to_body + (op_symbol_code (gimple_assign_rhs_code (gmp)), code_gen); +} + +/* Print pointer expression represented by EXPR. TYPE_SIZE represents + size of the base type for EXPR. CODE_GEN holds information related + to OpenCL code generation. */ + +static void +opencl_print_addr_operand (tree expr, tree type_size, opencl_main code_gen) +{ + if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE) + { + opencl_append_string_to_body ("(", code_gen); + opencl_print_operand (expr, false, code_gen); + opencl_append_string_to_body ("/", code_gen); + opencl_print_operand (type_size, false, code_gen); + opencl_append_string_to_body (")", code_gen); + } + else + opencl_print_operand (expr, false, code_gen); + +} + +/* Print unary gimple operation GMP. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_unary (gimple gmp, opencl_main code_gen) +{ + switch (gimple_assign_rhs_code (gmp)) + { + case BIT_NOT_EXPR: + opencl_append_string_to_body ("~", code_gen); + return; + case TRUTH_NOT_EXPR: + opencl_append_string_to_body ("!", code_gen); + return; + case NEGATE_EXPR: + opencl_append_string_to_body ("-", code_gen); + return; + case MODIFY_EXPR: + default: + return; + } +} + +/* Generate code for min or max gimple operand GMP. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_max_min_assign (gimple gmp, opencl_main code_gen) +{ + tree lhs = gimple_assign_lhs (gmp); + tree rhs1 = gimple_assign_rhs1 (gmp); + tree rhs2 = gimple_assign_rhs2 (gmp); + bool max = gimple_assign_rhs_code (gmp) == MAX_EXPR; + + opencl_print_operand (lhs, true, code_gen); + opencl_append_string_to_body (max?" = fmax (":"= fmin (", code_gen); + opencl_print_operand (rhs1, false, code_gen); + opencl_append_string_to_body (",", code_gen); + opencl_print_operand (rhs2, false, code_gen); + opencl_append_string_to_body (");\n", code_gen); + +} + +/* Generate code for gimple assignment statement GMP. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_gimple_assign (gimple gmp, opencl_main code_gen) +{ + int num_of_ops = gimple_num_ops (gmp); + tree lhs; + tree rhs1; + tree rhs2; + bool addr_expr; + int result; + tree result_size = NULL; + + if (gimple_assign_rhs_code (gmp) == MAX_EXPR + || gimple_assign_rhs_code (gmp) == MIN_EXPR) + { + opencl_print_max_min_assign (gmp, code_gen); + return; + } + gcc_assert (num_of_ops == 2 || num_of_ops == 3); + lhs = gimple_assign_lhs (gmp); + + addr_expr = (TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE); + if (addr_expr) + result_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); + + rhs1 = gimple_assign_rhs1 (gmp); + rhs2 = gimple_assign_rhs2 (gmp); + result = opencl_print_operand (lhs, true, code_gen); + if (result != 0) + return; + opencl_append_string_to_body (" = ", code_gen); + + if (addr_expr) + opencl_print_addr_operand (rhs1, result_size, code_gen); + else + { + if (rhs2 == NULL) + opencl_print_unary (gmp, code_gen); + opencl_print_operand (rhs1, false, code_gen); + } + if (rhs2 != NULL_TREE) + { + opencl_print_gimple_assign_operation (gmp, code_gen); + if (addr_expr) + opencl_print_addr_operand (rhs2, result_size, code_gen); + else + opencl_print_operand (rhs2, false, code_gen); + } + opencl_append_string_to_body (";\n",code_gen); +} + +/* Generate code for arguments for gimple call statement GMP. + CODE_GEN hold information related to OpenCL code generation. */ + +static void +opencl_print_gimple_call_args (opencl_main code_gen, gimple gmp) +{ + size_t len = gimple_call_num_args (gmp); + size_t i; + opencl_append_string_to_body (" (",code_gen); + for (i = 0; i < len; i++) + { + opencl_print_operand (gimple_call_arg (gmp, i), false, code_gen); + if (i < len - 1) + opencl_append_string_to_body (", ",code_gen); + } + opencl_append_string_to_body (")",code_gen); +} + +/* Replace some function names. */ + +static const char * +opencl_get_function_name (tree function) +{ + const char *gimple_name = IDENTIFIER_POINTER (DECL_NAME (function)); + if (!strcmp (gimple_name, "__builtin_powf")) + return "pow"; + return gimple_name; +} + +/* Generate code for gimple call statement GMP. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_gimple_call (opencl_main code_gen, gimple gmp) +{ + tree lhs = gimple_call_lhs (gmp); + tree function = gimple_call_fn (gmp); + opencl_print_operand (lhs, true, code_gen); + opencl_append_string_to_body (" = ", code_gen); + + while (TREE_CODE (function) == ADDR_EXPR + || TREE_CODE (function) == INDIRECT_REF) + function = TREE_OPERAND (function, 0); + opencl_append_string_to_body (opencl_get_function_name (function), code_gen); + opencl_print_gimple_call_args (code_gen, gmp); + opencl_append_string_to_body (";\n",code_gen); +} + +/* Generate code for gimple statment SMP. Now only assignment + operation are supported, but it seems enough for clast translation. + GIMPLE_COND statements are loop bound conditions and can be safely + ignored. CODE_GEN holds information related to OpenCL code + generation. */ + +static void +opencl_print_gimple (gimple gmp, opencl_main code_gen) +{ + if (!gmp) + return; + + switch (gimple_code (gmp)) + { + case GIMPLE_ASSIGN: + opencl_print_gimple_assign (gmp, code_gen); + break; + case GIMPLE_COND: + break; + case GIMPLE_PHI: + break; + case GIMPLE_CALL: + opencl_print_gimple_call (code_gen, gmp); + break; + case GIMPLE_DEBUG: + break; + case GIMPLE_LABEL: + { + tree label = gimple_label_label (gmp); + opencl_print_operand (label, false, code_gen); + opencl_append_string_to_body (": ", code_gen); + } + break; + default: + debug_gimple_stmt (gmp); + gcc_unreachable (); + } +} + +/* Get name of the variable, represented by tree NODE. If variable is + temporary, generate name for it. */ + +static const char * +opencl_get_var_name (tree node) +{ + bool ssa_name = TREE_CODE (node) == SSA_NAME; + tree name; + int num = 0; + if (ssa_name) + { + num = SSA_NAME_VERSION (node); + node = SSA_NAME_VAR (node); + } + name = DECL_NAME (node); + if (name) + { + if (!ssa_name) + return identifier_to_locale (IDENTIFIER_POINTER (name)); + else + { + const char *base = identifier_to_locale (IDENTIFIER_POINTER (name)); + char *buff = XNEWVEC (char, strlen (base) + 5); + sprintf (buff, "%s_%d", base, num); + return buff; + } + } + else + { + int tmp_var_uid = DECL_UID (node); + char *tmp = XNEWVEC (char, 30); + sprintf (tmp, "opencl_var_%d_%d", tmp_var_uid, num); + return tmp; + } +} + +/* Append variable name NAME to function body. Differs from appending + string by replacing `.' by `_'. CODE_GEN holds information related + to OpenCL code generation. */ + +static void +opencl_append_var_name (const char *name, opencl_main code_gen) +{ + int len = strlen (name); + char *tmp = XNEWVEC (char, len + 1); + int i; + for (i = 0; i <= len; i++) + { + char tt = name[i]; + if (tt == '.') + tt = '_'; + tmp[i] = tt; + } + opencl_append_string_to_body (tmp, code_gen); + free (tmp); +} + +/* If variable VAR_DECL is not defined and it is not marked as a + parameter, mark it as a parameter and add it to parameters list. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_try_variable (opencl_main code_gen, tree var_decl) +{ + const char *name = opencl_get_var_name (var_decl); + gcc_assert (code_gen->defined_vars); + + if (check_and_mark_arg (code_gen, name, false)) + opencl_add_function_arg (code_gen, var_decl, name); +} + +/* Define non scalar variable, represented be DATA as either local + variable or kernel argument. CODE_GEN holds information related to + OpenCL code generation. */ + +static void +opencl_add_non_scalar_function_arg (opencl_main code_gen, + opencl_data data) +{ + const char *decl; + static int counter = 0; + opencl_body body = code_gen->current_body; + tree var = data->exact_object; + const char *name = opencl_get_var_name (var); + tree type = TREE_TYPE (var); + + /* Check whether given variable can be privatized. */ + if (data->privatized) + { + /* Define variable as local variable. */ + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + decl = opencl_print_function_arg_with_type (name, type); + dyn_string_append_cstr (body->pre_header, decl); + dyn_string_append_cstr (body->pre_header, ";\n"); + return; + } + else + { + /* Define variable as kernel argument. */ + char decl_name [30]; + tree main_type = opencl_get_main_type (type); + sprintf (decl_name, "oclFTmpArg%d", counter++); + decl = opencl_print_function_arg_with_type (decl_name, main_type); + dyn_string_append_cstr (body->non_scalar_args, "__global "); + opencl_add_non_scalar_type_decl (var, body->pre_header, decl_name); + dyn_string_append_cstr (body->non_scalar_args, decl); + dyn_string_append_cstr (body->non_scalar_args, ", "); + VEC_safe_push (opencl_data, heap, body->data_refs, data); + } +} + +/* Register data reference REF to variable DATA. Do nothing, if it + has already been registered. CODE_GEN holds information related to + OpenCL code generation. */ + +static void +opencl_try_data_ref (opencl_main code_gen, data_reference_p ref, + opencl_data data) +{ + tree var = dr_outermost_base_object (ref); + const char *name = opencl_get_var_name (var); + const char ** slot; + gcc_assert (code_gen->defined_vars); + + slot = (const char **)htab_find_slot (code_gen->global_defined_vars, + name, INSERT); + if (*slot) + return; + *slot = name; + opencl_add_non_scalar_function_arg (code_gen, data); +} + +/* Register data reference D_REF in current kernel. CODE_GEN hold + information related to OpenCL code generation. */ + +static void +opencl_add_data_ref (opencl_main code_gen, data_reference_p d_ref) +{ + opencl_data tmp = opencl_get_data_by_data_ref (code_gen, d_ref); + + gcc_assert (tmp); + if (!DR_IS_READ (d_ref)) + { + bitmap_set_bit (code_gen->curr_meta->modified_on_device, tmp->id); + tmp->written_in_current_body = true; + tmp->ever_written_on_device = true; + code_gen->current_body->num_of_data_writes ++; + } + else + { + tmp->read_in_current_body = true; + tmp->ever_read_on_device = true; + } + if (!tmp->privatized) + tmp->used_on_device = true; + + opencl_try_data_ref (code_gen, d_ref, tmp); +} + +/* Add base objects of all data references in PBB as arguments to + current kernel. CODE_GEN holds information related to OpenCL code + generation. */ + +static void +opencl_add_data_refs_pbb (poly_bb_p pbb, opencl_main code_gen) +{ + VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb); + int i; + poly_dr_p curr; + + for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++) + { + data_reference_p d_ref = (data_reference_p) PDR_CDR (curr); + opencl_add_data_ref (code_gen, d_ref); + } +} + +/* Generate operand for tree node NODE. If LSH is true, generated + operand must be lvalue, otherwise it's rvalue. CODE_GEN holds + information related to OpenCL code generation. Also generate + definitions for variables if necessary. Variable definition is not + necessary if variable has already been defined or if it has been + defined in other sese. */ + +static int +opencl_print_operand (tree node, bool lhs, opencl_main code_gen) +{ + tree scev = scalar_evolution_in_region (code_gen->region, + code_gen->context_loop, + node); + tree new_node = chrec_apply_map (scev, code_gen->iv_map); + + if (TREE_CODE (new_node) != SCEV_NOT_KNOWN) + node = new_node; + + switch (TREE_CODE (node)) + { + case NOP_EXPR: + return opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen); + case PLUS_EXPR: + { + if (lhs) + return -1; + opencl_append_string_to_body ("(", code_gen); + opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen); + opencl_append_string_to_body (" + ", code_gen); + opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen); + opencl_append_string_to_body (")", code_gen); + return 0; + } + case MULT_EXPR: + { + if (lhs) + return -1; + opencl_append_string_to_body ("(", code_gen); + opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen); + opencl_append_string_to_body (" * ", code_gen); + opencl_print_operand (TREE_OPERAND (node, 1), false, code_gen); + opencl_append_string_to_body (")", code_gen); + return 0; + } + + case SSA_NAME: + { + /* If rhs just add variable name. Otherwise + it may be necessary to add variable definition. */ + const char *tmp = opencl_get_var_name (node); + if (lhs) + opencl_add_variable (tmp, node, code_gen); + else + opencl_append_var_name (tmp, code_gen); + + /* This call adds variable declaration as formal + parameter in kernel header if it is necessary. */ + opencl_try_variable (code_gen, node); + return 0; + } + case ARRAY_REF: + { + /* []. */ + tree arr = TREE_OPERAND (node, 0); + tree offset = TREE_OPERAND (node, 1); + opencl_print_operand (arr, false, code_gen); + + opencl_append_string_to_body ("[", code_gen); + opencl_print_operand (offset, false, code_gen); + opencl_append_string_to_body ("]", code_gen); + return 0; + } + case INTEGER_CST: + { + /* Just print integer constant. */ + unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (node); + if (lhs) + return -1; + if (host_integerp (node, 0)) + opencl_append_num_to_body (code_gen, (long)low, "%ld"); + else + { + HOST_WIDE_INT high = TREE_INT_CST_HIGH (node); + char buff[100]; + buff[0] = ' '; + if (tree_int_cst_sgn (node) < 0) + { + buff[0] = '-'; + high = ~high + !low; + low = -low; + } + sprintf (buff + 1, HOST_WIDE_INT_PRINT_DOUBLE_HEX, + (unsigned HOST_WIDE_INT) high, low); + opencl_append_string_to_body (buff, code_gen); + } + return 0; + } + case REAL_CST: + { + char buff[100]; + REAL_VALUE_TYPE tmp = TREE_REAL_CST (node); + if (lhs) + return -1; + real_to_decimal (buff, &tmp, sizeof (buff), 0, 1); + opencl_append_string_to_body (buff, code_gen); + return 0; + } + case FIXED_CST: + { + char buff[100]; + if (lhs) + return -1; + fixed_to_decimal (buff, TREE_FIXED_CST_PTR (node), sizeof (buff)); + opencl_append_string_to_body (buff, code_gen); + return 0; + } + case STRING_CST: + { + opencl_append_string_to_body ("\"", code_gen); + opencl_append_string_to_body (TREE_STRING_POINTER (node), code_gen); + opencl_append_string_to_body ("\"", code_gen); + return 0; + } + case VAR_DECL: + case PARM_DECL: + { + tree decl_name = DECL_NAME (node); + const char *tmp; + gcc_assert (decl_name); + tmp = IDENTIFIER_POINTER (decl_name); + + opencl_append_var_name (tmp, code_gen); + opencl_try_variable (code_gen, node); + return 0; + } + case FIELD_DECL: + { + tree decl_name = DECL_NAME (node); + const char *tmp; + gcc_assert (decl_name); + tmp = IDENTIFIER_POINTER (decl_name); + opencl_append_var_name (tmp, code_gen); + return 0; + } + case LABEL_DECL: + { + tree decl_name = DECL_NAME (node); + if (decl_name) + { + const char *tmp = IDENTIFIER_POINTER (decl_name); + opencl_append_var_name (tmp, code_gen); + return 0; + } + + if (LABEL_DECL_UID (node) != -1) + { + opencl_append_num_to_body (code_gen, (int) LABEL_DECL_UID (node), + "L%d"); + return 0; + } + opencl_append_num_to_body (code_gen, (int) DECL_UID (node), + "D_%u"); + return 0; + } + case INDIRECT_REF: + { + opencl_append_string_to_body ("(*", code_gen); + opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen); + opencl_append_string_to_body (")", code_gen); + return 0; + } + case ADDR_EXPR: + { + opencl_append_string_to_body ("&", code_gen); + opencl_print_operand (TREE_OPERAND (node, 0), false, code_gen); + return 0; + } + case COMPONENT_REF: + { + tree op1 = TREE_OPERAND (node, 0); + tree op2 = TREE_OPERAND (node, 1); + opencl_print_operand (op1, false, code_gen); + + if (op1 && TREE_CODE (op1) == INDIRECT_REF) + opencl_append_string_to_body ("->", code_gen); + else + opencl_append_string_to_body (".", code_gen); + + opencl_print_operand (op2, false, code_gen); + return 0; + } + default: + debug_tree (node); + gcc_unreachable (); + } + + return 0; +} + +/* Append variable VAR with name VAR_NAME to current function body. + If variable has been defined in current scope, but definition for + it has not been generated - then generate it's definition and mark + variable as defined. CODE_GEN holds information related to OpenCL + code generation. */ + +static void +opencl_add_variable (const char *var_name, tree var, opencl_main code_gen) +{ + const char ** slot; + if (htab_find (code_gen->global_defined_vars, var_name)) + { + opencl_append_var_name (var_name, code_gen); + return; + } + + slot = (const char **) htab_find_slot + (code_gen->defined_vars, var_name, INSERT); + + if (! (*slot) && defined_in_sese_p (var, code_gen->region)) + { + const char *decl; + tree type = TREE_TYPE (var); + *slot = var_name; + if (TREE_CODE (type) == POINTER_TYPE + || TREE_CODE (type) == ARRAY_TYPE) + { + opencl_add_non_scalar_type_decl (var, code_gen->current_body->body, + NULL); + } + else + { + var = SSA_NAME_VAR (var); + decl = opencl_print_function_arg_with_type (var_name, type); + opencl_append_string_to_body (decl, code_gen); + } + return; + } + opencl_append_var_name (var_name, code_gen); +} + +/* Append list of names of loop iterators from CODE_GEN with same type + TYPE to current kernel. FIRST and LAST define outermost and + innermost iterators to append respectively. */ + +static void +opencl_print_local_vars (const char *fist, const char *last, + const char *type, opencl_main code_gen) +{ + char ** names = code_gen->root_names->_scattering; + int len = code_gen->root_names->_nb_scattering; + int i; + for (i = 0; i < len; i++) + { + const char *tmp = names[i]; + if (opencl_cmp_scat (fist, tmp) <= 0 + && opencl_cmp_scat (last, tmp) >= 0) + { + const char ** slot = + (const char **) htab_find_slot (code_gen->global_defined_vars, + tmp, INSERT); + *slot = tmp; + continue; + } + + if (opencl_cmp_scat (fist, tmp) > 0) + continue; + + opencl_append_string_to_body (type, code_gen); + opencl_append_string_to_body (" ", code_gen); + opencl_append_string_to_body (tmp, code_gen); + opencl_append_string_to_body (";\n", code_gen); + *((const char **)htab_find_slot (code_gen->global_defined_vars, + tmp, INSERT)) = tmp; + } +} + +/* Replace all dots to underscores in string pointed to by P. Return P. */ + +static char * +filter_dots (char *p) +{ + char *s; + for (s = p; *s; s++) + if (*s == '.') + *s = '_'; + return p; +} + +/* Return string with varibale definition. ARG_NAME is the name of + the variable and TYPE is it's type. */ + +static const char * +opencl_print_function_arg_with_type (const char *arg_name, tree type) +{ + const char *decl = gen_type_with_name (arg_name, type); + char *ddecl; + ddecl = xstrdup (decl); + return filter_dots (ddecl); +} + +/* Generate definition for non scalar variable VAR and place it to + string DEST. Use DECL_NAME as variable name. */ + +static void +opencl_add_non_scalar_type_decl (tree var, dyn_string_t dest, + const char *decl_name) +{ + tree type = TREE_TYPE (var); + const char *name = opencl_get_var_name (var); + static int counter = 0; + char type_name [30]; + char *tmp_name = xstrdup (name); + const char *new_type; + tree inner_type = TREE_TYPE (type); + + filter_dots (tmp_name); + + sprintf (type_name, "oclFTmpType%d", counter++); + + new_type = opencl_print_function_arg_with_type (type_name, inner_type); + + dyn_string_append_cstr (dest, "typedef __global "); + dyn_string_append_cstr (dest, new_type); + dyn_string_append_cstr (dest, ";\n"); + + dyn_string_append_cstr (dest, type_name); + dyn_string_append_cstr (dest, " *"); + dyn_string_append_cstr (dest, tmp_name); + if (decl_name != NULL) + { + dyn_string_append_cstr (dest, " = ("); + dyn_string_append_cstr (dest, type_name); + dyn_string_append_cstr (dest, "*)"); + dyn_string_append_cstr (dest, decl_name); + dyn_string_append_cstr (dest, ";\n"); + } + free (tmp_name); + +} + +/* Check whether variable with name NAME has been defined as global or + local variable and mark it as defined. This function returns false + if variable has already been defined, otherwise it returns true. */ + +static bool +check_and_mark_arg (opencl_main code_gen, const char *name, bool local) +{ + const char ** slot; + gcc_assert (code_gen->defined_vars || !local); + if (code_gen->defined_vars) + { + slot = (const char **)htab_find_slot (code_gen->defined_vars, + name, INSERT); + if (*slot) + return false; + if (local) + *slot = name; + } + + slot = (const char **)htab_find_slot (code_gen->global_defined_vars, + name, INSERT); + if (*slot) + return false; + if (!local) + *slot = name; + return true; +} + +/* Add variable VAR with name NAME as function argument. Append it's + declaration in finction header and add it as function parameter. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_add_function_arg (opencl_main code_gen, tree var, const char *name) +{ + opencl_body body; + const char *decl; + tree type; + type = TREE_TYPE (var); + body = code_gen->current_body; + decl = opencl_print_function_arg_with_type (name, type); + dyn_string_append_cstr (body->header, decl); + dyn_string_append_cstr (body->header, ", "); + VEC_safe_push (tree, heap, body->function_args, var); +} + +/* Generate kernel function code for clast for statement F, located on + depth DEPTH. CODE_GEN holds information related to OpenCL code + generation. */ + +opencl_body +opencl_clast_to_kernel (struct clast_for * f, opencl_main code_gen, + int depth) +{ + opencl_body tmp = opencl_body_create (); + code_gen->current_body = tmp; + return opencl_print_loop (f, code_gen, depth); +} + +/* Generate code for clast statement S, located on depth DEPTH. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_stmt_list (struct clast_stmt *s, opencl_main code_gen, int depth) +{ + for ( ; s; s = s->next) { + gcc_assert (!CLAST_STMT_IS_A (s, stmt_root)); + if (CLAST_STMT_IS_A (s, stmt_ass)) + { + opencl_print_assignment ((struct clast_assignment *) s, code_gen); + opencl_append_string_to_body (";\n", code_gen); + } + else if (CLAST_STMT_IS_A (s, stmt_user)) + opencl_print_user_stmt ((struct clast_user_stmt *) s, code_gen); + else if (CLAST_STMT_IS_A (s, stmt_for)) + opencl_print_for ((struct clast_for *) s, code_gen, depth); + else if (CLAST_STMT_IS_A (s, stmt_guard)) + opencl_print_guard ((struct clast_guard *) s, code_gen, depth); + else if (CLAST_STMT_IS_A (s, stmt_block)) + { + opencl_append_string_to_body ("{\n", code_gen); + opencl_print_stmt_list (((struct clast_block *)s)->body, code_gen, + depth); + opencl_append_string_to_body ("}\n", code_gen); + } + else + gcc_unreachable (); + } +} + +/* Generate code for clast for statement F, locate on depth LEVEL. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_for (struct clast_for *f, opencl_main code_gen, int level) +{ + tree iv; + tree iv_type; + const char *tmp; + const char *decl; + opencl_append_string_to_body ("for (", code_gen); + if (f->LB) + { + opencl_append_string_to_body (f->iterator, code_gen); + opencl_append_string_to_body ("=", code_gen); + opencl_print_expr (f->LB, code_gen); + } + opencl_append_string_to_body (";", code_gen); + + if (f->UB) + { + opencl_append_string_to_body (f->iterator, code_gen); + opencl_append_string_to_body ("<=", code_gen); + opencl_print_expr (f->UB, code_gen); + } + opencl_append_string_to_body (";", code_gen); + + if (value_gt_si (f->stride, 1)) + { + opencl_append_string_to_body (f->iterator, code_gen); + opencl_append_string_to_body ("+=", code_gen); + opencl_append_num_to_body (code_gen, mpz_get_si (f->stride), "%d)\n{\n"); + } + else + { + opencl_append_string_to_body (f->iterator, code_gen); + opencl_append_string_to_body ("++", code_gen); + opencl_append_string_to_body (")\n{\n", code_gen); + } + iv_type = opencl_get_loop_iter_type (f, code_gen, level); + iv = create_tmp_var (iv_type, "scat_tmp_iter"); + + tmp = opencl_get_var_name (iv); + check_and_mark_arg (code_gen, tmp, false); + decl = opencl_print_function_arg_with_type (tmp, iv_type); + opencl_append_string_to_body (decl, code_gen); + + opencl_append_string_to_body (" = ", code_gen); + opencl_append_string_to_body (f->iterator, code_gen); + opencl_append_string_to_body (";\n", code_gen); + + save_clast_name_index (code_gen->newivs_index, f->iterator, + VEC_length (tree, code_gen->newivs)); + VEC_safe_push (tree, heap, code_gen->newivs, iv); + + opencl_print_stmt_list (f->body, code_gen, level + 1); + opencl_append_string_to_body ("}\n", code_gen); +} + +/* Generate code for clast conditional statement G, locate on depth DEPTH. + CODE_GEN holds information related to OpenCL code generation. */ + +static void +opencl_print_guard (struct clast_guard *g, opencl_main code_gen, int depth) +{ + int k; + opencl_append_string_to_body ("if ", code_gen); + if (g->n > 1) + opencl_append_string_to_body ("(", code_gen); + for (k = 0; k < g->n; ++k) + { + if (k > 0) + opencl_append_string_to_body (" && ", code_gen); + opencl_append_string_to_body ("(", code_gen); + opencl_print_equation (&g->eq[k], code_gen); + opencl_append_string_to_body (")", code_gen); + } + if (g->n > 1) + opencl_append_string_to_body (")", code_gen); + opencl_append_string_to_body (" {\n", code_gen); + opencl_print_stmt_list (g->then, code_gen, depth); + opencl_append_string_to_body ("}\n", code_gen); +} + + +/* Generate code for clast equation EQ. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_equation (struct clast_equation *eq, opencl_main code_gen) +{ + opencl_print_expr (eq->LHS, code_gen); + if (eq->sign == 0) + opencl_append_string_to_body (" == ", code_gen); + else if (eq->sign > 0) + opencl_append_string_to_body (" >= ", code_gen); + else + opencl_append_string_to_body (" <= ", code_gen); + opencl_print_expr (eq->RHS, code_gen); +} + +/* Generate code for clast expression E. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_expr (struct clast_expr *e, opencl_main code_gen) +{ + if (!e) + return; + switch (e->type) + { + case expr_term: + opencl_print_term ((struct clast_term*) e, code_gen); + break; + case expr_red: + opencl_print_reduction ((struct clast_reduction*) e, code_gen); + break; + case expr_bin: + opencl_print_binary ((struct clast_binary*) e, code_gen); + break; + default: + gcc_unreachable (); + } +} + +/* Generate code for clast term T. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_term (struct clast_term *t, opencl_main code_gen) +{ + if (t->var) + { + const char *real_name = opencl_get_scat_real_name (code_gen, t->var); + if (value_one_p (t->val)) + opencl_append_var_name (real_name, code_gen); + else if (value_mone_p (t->val)) + { + opencl_append_string_to_body ("-", code_gen); + opencl_append_var_name (real_name, code_gen); + } + else + { + opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d"); + opencl_append_string_to_body ("*", code_gen); + opencl_append_var_name (real_name, code_gen); + } + opencl_add_scat_as_arg (code_gen, t->var, real_name); + } + else + opencl_append_num_to_body (code_gen, mpz_get_si (t->val), "%d"); +} + +/* Generate code for clast reduction statement R. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_reduction (struct clast_reduction *r, opencl_main code_gen) +{ + switch (r->type) + { + case clast_red_sum: + opencl_print_sum (r, code_gen); + break; + case clast_red_min: + case clast_red_max: + if (r->n == 1) + { + opencl_print_expr (r->elts[0], code_gen); + break; + } + opencl_print_minmax_c (r, code_gen); + break; + default: + gcc_unreachable (); + } +} + +/* Generate code for clast sum statement R. CODE_GEN holds information + related to OpenCL code generation. */ + +static void +opencl_print_sum (struct clast_reduction *r, opencl_main code_gen) +{ + int i; + struct clast_term *t; + + gcc_assert (r->n >= 1 && r->elts[0]->type == expr_term); + t = (struct clast_term *) r->elts[0]; + opencl_print_term (t, code_gen); + + for (i = 1; i < r->n; ++i) + { + gcc_assert (r->elts[i]->type == expr_term); + t = (struct clast_term *) r->elts[i]; + if (value_pos_p (t->val)) + opencl_append_string_to_body ("+", code_gen); + opencl_print_term (t, code_gen); + } +} + +/* Generate code for clast binary operation B. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_binary (struct clast_binary *b, opencl_main code_gen) +{ + const char *s1 = NULL, *s2 = NULL, *s3 = NULL; + bool group = (b->LHS->type == expr_red + && ((struct clast_reduction*) b->LHS)->n > 1); + + switch (b->type) + { + case clast_bin_fdiv: + s1 = "floor ((", s2 = ")/(", s3 = "))"; + break; + case clast_bin_cdiv: + s1 = "ceil ((", s2 = ")/(", s3 = "))"; + break; + case clast_bin_div: + if (group) + s1 = "(", s2 = ")/", s3 = ""; + else + s1 = "", s2 = "/", s3 = ""; + break; + case clast_bin_mod: + if (group) + s1 = "(", s2 = ")%", s3 = ""; + else + s1 = "", s2 = "%", s3 = ""; + break; + } + + opencl_append_string_to_body (s1, code_gen); + opencl_print_expr (b->LHS, code_gen); + opencl_append_string_to_body (s2, code_gen); + opencl_append_num_to_body (code_gen, mpz_get_si (b->RHS), "%d"); + opencl_append_string_to_body (s3, code_gen); +} + +/* Generate code for clast min/max operation R. CODE_GEN holds + information related to OpenCL code generation. */ + +static void +opencl_print_minmax_c ( struct clast_reduction *r, opencl_main code_gen) +{ + int i; + for (i = 1; i < r->n; ++i) + opencl_append_string_to_body (r->type == clast_red_max ? "max (" : "min (", + code_gen); + if (r->n > 0) + { + opencl_append_string_to_body ("(unsigned int)(", code_gen); + opencl_print_expr (r->elts[0], code_gen); + opencl_append_string_to_body (")", code_gen); + } + for (i = 1; i < r->n; ++i) + { + opencl_append_string_to_body (",", code_gen); + opencl_append_string_to_body ("(unsigned int)(", code_gen); + opencl_print_expr (r->elts[i], code_gen); + opencl_append_string_to_body ("))", code_gen); + } +} + +#endif diff --git a/gcc/graphite-opencl-meta-clast.c b/gcc/graphite-opencl-meta-clast.c new file mode 100644 index 0000000..4fc39a9 --- /dev/null +++ b/gcc/graphite-opencl-meta-clast.c @@ -0,0 +1,784 @@ +/* Build meta information from clast data structures for GRAPHITE-OpenCL. + Copyright (C) 2009, 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This file implements building meta infromation about data + references, supported types and operations and dependencies. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "toplev.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "domwalk.h" +#include "value-prof.h" +#include "pointer-set.h" +#include "gimple.h" +#include "sese.h" +#include "output.h" +#include "hashtab.h" +#include "tree.h" +#include "gimple-pretty-print.h" + +#ifdef HAVE_cloog +#include "cloog/cloog.h" +#include "ppl_c.h" +#include "graphite-ppl.h" +#include "graphite.h" +#include "graphite-poly.h" +#include "graphite-scop-detection.h" +#include "graphite-clast-to-gimple.h" +#include "graphite-dependences.h" +#include "dyn-string.h" +#include "graphite-opencl.h" + +/* Something like C++ std::map. */ + +struct opencl_pair_def +{ + int id; + int val; +}; + +typedef struct opencl_pair_def * opencl_pair; + +/* Hash function for opencl_pair. */ + +static hashval_t +opencl_pair_to_hash (const void * data) +{ + const struct opencl_pair_def * obj = (const struct opencl_pair_def *)data; + return (hashval_t) (obj->id); +} + +/* Compare function for opencl_pair. */ + +static int +opencl_pair_cmp (const void * e1, const void * e2) +{ + const struct opencl_pair_def * obj1 = (const struct opencl_pair_def *)e1; + const struct opencl_pair_def * obj2 = (const struct opencl_pair_def *)e2; + + return obj1->id == obj2->id; +} + +/* Create new opencl_pair with NEW_ID as id and NEW_VAL as val. */ + +static opencl_pair +opencl_pair_create (int new_id, int new_val) +{ + opencl_pair tmp = XNEW (struct opencl_pair_def); + tmp->id = new_id; + tmp->val = new_val; + return tmp; +} + +/* Delete opencl_pair DATA. */ + +static void +opencl_pair_delete (opencl_pair data) +{ + free (data); +} + +/* Create new opencl_clast_meta structure with PARENT as parent, + DEPTH as out_depth. If ACCESS_INIT is true, then init access bitmaps. */ + +static opencl_clast_meta +opencl_clast_meta_create (int depth, opencl_clast_meta parent, + bool access_init) +{ + opencl_clast_meta tmp = XNEW (struct opencl_clast_meta_def); + tmp->out_depth = depth; + tmp->in_depth = 0; + tmp->next = NULL; + tmp->body = NULL; + tmp->parent = parent; + tmp->on_device = false; + tmp->modified_on_host = BITMAP_ALLOC (NULL); + tmp->modified_on_device = BITMAP_ALLOC (NULL); + tmp->access_unsupported = false; + if (access_init) + { + tmp->can_be_private = BITMAP_ALLOC (NULL); + tmp->access = BITMAP_ALLOC (NULL); + } + else + { + tmp->access = NULL; + tmp->can_be_private = NULL; + } + return tmp; +} + +/* Check whether type TYPE is supported by current graphite-opencl + implementation. If PTR or ARRAY is true, then TYPE can not be pointer + type (because only one level of pointers is supported). Also if PTR is + true, TYPE can not be array (because pointers to arrays are not + supported yet). */ + +static bool +opencl_supported_type_p (tree type, bool ptr, bool array) +{ + switch (TREE_CODE (type)) + { + case POINTER_TYPE: + { + if (ptr || array) + return false; + return opencl_supported_type_p (TREE_TYPE (type), true, false); + } + case ARRAY_TYPE: + { + if (ptr) + return false; + return opencl_supported_type_p (TREE_TYPE (type), false, true); + } + case FUNCTION_DECL: + case FUNCTION_TYPE: + case COMPLEX_TYPE: + case RECORD_TYPE: + case ENUMERAL_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + case METHOD_TYPE: + case REFERENCE_TYPE: + return false; + + case BOOLEAN_TYPE: + case INTEGER_TYPE: + case REAL_TYPE: + return true; + case VOID_TYPE: + return true; + case OFFSET_TYPE: + case FIXED_POINT_TYPE: + case VECTOR_TYPE: + case LANG_TYPE: + default: + debug_tree (type); + gcc_unreachable (); + } +} + +/* Check whether expression ARG is supported by current graphite-opencl + implementation. */ + +static bool +opencl_supported_arg_p (opencl_main code_gen, tree arg) +{ + switch (TREE_CODE (arg)) + { + case SSA_NAME: + return opencl_supported_arg_p (code_gen, SSA_NAME_VAR (arg)); + + case ARRAY_REF: + case INDIRECT_REF: + case ADDR_EXPR: + return opencl_supported_arg_p (code_gen, TREE_OPERAND (arg, 0)); + + case VAR_DECL: + case PARM_DECL: + { + tree type = TREE_TYPE (arg); + if (TREE_CODE (type) == POINTER_TYPE) + if (!opencl_get_data_by_tree (code_gen, arg)) + return false; + + return opencl_supported_type_p (type, false, false); + } + + case INTEGER_CST: + case REAL_CST: + case POINTER_PLUS_EXPR: + return true; + + case FIELD_DECL: + case COMPONENT_REF: + case MEM_REF: + case REALPART_EXPR: + case IMAGPART_EXPR: + case COMPLEX_EXPR: + case CALL_EXPR: + case RESULT_DECL: + return false; + + default: + debug_tree (arg); + gcc_unreachable (); + } +} + +/* Check whether gimple assignment statement GMP is supported by current + graphite-opencl implementation. CODE_GEN holds information about non + scalar arguments. */ + +static bool +opencl_gimple_assign_with_supported_types_p (opencl_main code_gen, gimple gmp) +{ + tree curr_tree; + int num_of_ops = gimple_num_ops (gmp); + gcc_assert (gimple_code (gmp) == GIMPLE_ASSIGN); + gcc_assert (num_of_ops == 2 || num_of_ops == 3); + + curr_tree = gimple_assign_lhs (gmp); + if (!opencl_supported_arg_p (code_gen, curr_tree)) + return false; + + curr_tree = gimple_assign_rhs1 (gmp); + if (!opencl_supported_arg_p (code_gen, curr_tree)) + return false; + + if (num_of_ops == 3) + { + curr_tree = gimple_assign_rhs2 (gmp); + if (!opencl_supported_arg_p (code_gen, curr_tree)) + return false; + } + return true; +} + +/* Check whether all statements in basic block BB are supported by current + graphite-opencl implementation. CODE_GEN holds information about non + scalar arguments. */ + +static bool +opencl_supported_type_access_p (opencl_main code_gen, basic_block bb) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (!stmt) + continue; + switch (gimple_code (stmt)) + { + case GIMPLE_DEBUG: + case GIMPLE_COND: + case GIMPLE_PHI: + case GIMPLE_LABEL: + continue; + case GIMPLE_ASSIGN: + if (!opencl_gimple_assign_with_supported_types_p (code_gen, stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "opencl_supported_type_access_p:" + " bad types in assignment\n"); + print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS); + } + return false; + } + continue; + case GIMPLE_CALL: + return false; + default: + debug_gimple_stmt (stmt); + gcc_unreachable (); + } + } + return true; +} + + +/* Mark variable, represented by tree OBJ as visited in bitmap VISITED. + If DEF is true and given variable can be privatized, mark it as + privatized in META. CODE_GEN holds information about non + scalar arguments. */ + +static void +opencl_def_use_data (opencl_main code_gen, tree obj, bitmap visited, + opencl_clast_meta meta, bool def) +{ + opencl_data data; + if (obj == NULL) + return; + data = opencl_get_data_by_tree (code_gen, + opencl_get_base_object_by_tree (obj)); + if (data == NULL) + return; + + if (!data->can_be_private) + return; + + if (!bitmap_set_bit (visited, data->id)) + return; + + if (!def) + return; + + bitmap_set_bit (meta->can_be_private, data->id); +} + +/* Mark data in META, corresponding to basic block BB, which can be + privatized. CODE_GEN holds information about non + scalar arguments. */ + +static void +opencl_calc_bb_privatization (opencl_main code_gen, basic_block bb, + opencl_clast_meta meta) +{ + gimple_stmt_iterator gsi; + bitmap visited = BITMAP_ALLOC (NULL); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + opencl_def_use_data (code_gen, gimple_assign_lhs (stmt), + visited, meta, true); + + opencl_def_use_data (code_gen, gimple_assign_rhs1 (stmt), + visited, meta, false); + + opencl_def_use_data (code_gen, gimple_assign_rhs2 (stmt), + visited, meta, false); + } + BITMAP_FREE (visited); +} + +/* Analyse clast_user_stmt STMT and set read/write flags for each data + reference in this statement in clast meta corresponding to this + statement. If some data references in statement are unsupported, + then mark META as accessing unsupported. CODE_GEN holds + information about non scalar arguments. */ + +static void +opencl_set_meta_rw_flags (opencl_clast_meta meta, + struct clast_user_stmt * stmt, + opencl_main code_gen) +{ + CloogStatement *cs = stmt->statement; + poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs); + VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb); + gimple_bb_p gbb = PBB_BLACK_BOX (pbb); + basic_block bb = GBB_BB (gbb); + int i; + poly_dr_p curr; + if (!opencl_supported_type_access_p (code_gen, bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Basic block contains unsupported " + "types in graphite-opencl\n"); + dump_bb (bb, dump_file, 0); + + } + + meta->access_unsupported = true; + } + opencl_calc_bb_privatization (code_gen, bb, meta); + for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++) + { + data_reference_p d_ref = (data_reference_p) PDR_CDR (curr); + tree data_ref_tree = dr_outermost_base_object (d_ref); + opencl_data data; + + if (!opencl_supported_arg_p (code_gen, data_ref_tree)) + { + meta->access_unsupported = true; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "Unsupported in graphite-opencl data reference\n"); + dump_data_reference (dump_file, d_ref); + + } + continue; + } + + data = opencl_get_data_by_data_ref (code_gen, d_ref); + gcc_assert (data); + + if (!data->supported) + meta->access_unsupported = true; + + if (!graphite_outer_subscript_bound (curr, false)) + { + meta->access_unsupported = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Can not determine subscript bound " + "for data reference\n"); + dump_data_reference (dump_file, d_ref); + + } + + continue; + } + + if (data->size_value == NULL) + { + meta->access_unsupported = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "Can not determine size for data reference\n"); + dump_data_reference (dump_file, d_ref); + } + } + bitmap_set_bit (meta->access, data->id); + } +} + +/* Update META access bitmap by union of access bitmaps of it's children. */ + +static void +opencl_collect_definitions_info (opencl_clast_meta meta) +{ + opencl_clast_meta curr = meta->body->next; + bitmap tmp_access = BITMAP_ALLOC (NULL); + bitmap_copy (tmp_access, meta->body->access); + meta->can_be_private = BITMAP_ALLOC (NULL); + bitmap_copy (meta->can_be_private, meta->body->can_be_private); + while (curr) + { + bitmap new_defs = BITMAP_ALLOC (NULL); + bitmap_and_compl (new_defs, curr->can_be_private, tmp_access); + bitmap_ior_into (tmp_access, curr->access); + bitmap_ior_into (meta->can_be_private, new_defs); + curr = curr->next; + BITMAP_FREE (new_defs); + } + meta->access = tmp_access; +} + +/* Build meta structure from clast structure. + BODY - base clast statement. + DEPTH - depth of BODY in whole clast structure. + PARENT - parent meta node. + CODE_GEN - data structure, which holds information + about non scalar arguments. */ + +opencl_clast_meta +opencl_create_meta_from_clast (opencl_main code_gen, + struct clast_stmt * body, int depth, + opencl_clast_meta parent) +{ + int max_depth = 0; + opencl_clast_meta result = NULL; + opencl_clast_meta curr = NULL; + struct clast_stmt * curr_stmt = body; + for ( ; curr_stmt; curr_stmt = curr_stmt->next) + { + opencl_clast_meta tmp_result = NULL; + if (CLAST_STMT_IS_A (curr_stmt, stmt_root)) + continue; + if (CLAST_STMT_IS_A (curr_stmt, stmt_user)) + { + tmp_result = opencl_clast_meta_create (depth, parent, true); + opencl_set_meta_rw_flags (tmp_result, + (struct clast_user_stmt*) curr_stmt, + code_gen); + } + if (CLAST_STMT_IS_A (curr_stmt, stmt_guard)) + { + struct clast_guard * if_stmt = (struct clast_guard *)curr_stmt; + /* For guard (if) statement create meta for it's body and just + append it to current list. */ + tmp_result = opencl_create_meta_from_clast (code_gen, if_stmt->then, + depth, parent); + } + if (CLAST_STMT_IS_A (curr_stmt, stmt_block)) + { + struct clast_block * bl_stmt = (struct clast_block *)curr_stmt; + tmp_result = opencl_create_meta_from_clast (code_gen, bl_stmt->body, + depth, parent); + } + if (CLAST_STMT_IS_A (curr_stmt, stmt_for)) + { + struct clast_for * for_stmt = (struct clast_for *) curr_stmt; + tmp_result = opencl_clast_meta_create (depth, parent, false); + tmp_result->body + = opencl_create_meta_from_clast (code_gen, for_stmt->body, + depth + 1, tmp_result); + + max_depth = (max_depth > tmp_result->in_depth + 1) + ? max_depth : tmp_result->in_depth + 1; + opencl_collect_definitions_info (tmp_result); + } + if (!result) + curr = result = tmp_result; + else + curr->next = tmp_result; + while (curr->next != NULL) curr = curr->next; + } + if (parent) + parent->in_depth = max_depth; + + return result; +} + +/* For all opencl_data referenced in META calculate depth of innermost + reference. DEPTH is the depth of the loop, represented be META + in current loop nest. DATE holds intermediate resuls. + Function returns false iff there is access to unsupported types in + given META. + Consider an example: + + | Meta_1 + | / \ + | / \ + | / \ + | / \ + | Meta_2 Meta_3 + | {D1,D2} / \ + | / \ + | / \ + | Meta_4 Meta_5 + | {D2,D4} | + | | + | | + | Meta_6 + | {D3} + + In this example D1_depth = 1, D2_depth = 2, D3_depth = 3, D4_depth = 2. */ + +static bool +opencl_calc_max_depth_tab (opencl_clast_meta meta, htab_t data, int depth) +{ + while (meta) + { + if (meta->body) + { + if (!opencl_calc_max_depth_tab (meta->body, data, depth + 1)) + return false; + } + else + { + /* User stmt. Analyze data access. */ + bitmap stmt_access = meta->access; + unsigned i; + bitmap_iterator bi; + if (meta->access_unsupported) + return false; + EXECUTE_IF_SET_IN_BITMAP (stmt_access, 0, i, bi) + { + opencl_pair curr_pair = opencl_pair_create (i, depth); + struct opencl_pair_def ** slot + = (struct opencl_pair_def **)htab_find_slot (data, curr_pair, + INSERT); + if (*slot == NULL) + *slot = curr_pair; + else + { + opencl_pair old_pair = *slot; + if (old_pair->val > curr_pair->val) + opencl_pair_delete (curr_pair); + else + { + *slot = curr_pair; + opencl_pair_delete (old_pair); + } + } + } + } + meta = meta->next; + } + return true; +} + +/* Check whether it's reasonable to pass data, represented by OBJ, + to device based on information from META. + This function helps to avoid kernels like this. + + | host_use_a_and_b (); + | for (int i = 0; i < N; i++) + | a[i] = b[i]; + + host_use_a_and_b (); + + We have no dependency here, but memory transfer from host to devices and + from device to host aren't reasonable here. + + We have 2 situations when memory transfer is reasonable. + + 1. + + | for (int j = 0; j < N; j++) + | { + | host_use_c (); + | for (int i = 0; i < N; i ++) + | a[i] = b [i] + j; + | } + + In this case we can put memory transfer befor first loop, so + we will avoid situation, when all benefits from parallel execution + can be eliminated by memory transfer. + + 2. + + | for (int i = 0; i < N; i ++) + | for (int j = 0; j < N; j ++) + | { + | c[i][j] = 0; + | for (int k = 0; k < N; k ++) + | c[i][j] += a[i][k] * b [k][j]; + | } + + In this case each element of a,b or c is used N time on device, + so memory transfer is reasonable. */ + +static bool +opencl_evaluate_data_access_p (opencl_data obj, opencl_clast_meta meta) +{ + int depth = obj->depth; + int data_id = obj->id; + opencl_clast_meta parent = meta->parent; + if (obj->privatized) + return false; + if (depth < obj->data_dim) + return false; + if (parent) + { + /* We have outer loop. */ + bitmap curr_bitmap = parent->modified_on_host; + /* Memory transfer for this statement has been placed outside + outer loop, so for one memory transfer will be executing more + then one kernel (first case). */ + if (!bitmap_bit_p (curr_bitmap, data_id)) + return true; + } + /* Check max depth of memory access (second case). */ + return (depth > obj->data_dim); +} + +/* Find opencl_data object by it's ID in CODE_GEN structures. */ + +static opencl_data +opencl_get_data_by_id (opencl_main code_gen, int id) +{ + VEC (opencl_data, heap) * main_data = code_gen->opencl_function_data; + opencl_data res = VEC_index (opencl_data, main_data, id); + gcc_assert (res->id == id); + return res; +} + +/* Check whether memory transfer is reasonable if clast statement, + connected with META, will be replaced by opencl kernel launch. + ACCESS holds depth of innermost data references + for all data, references in statement, represented by META. + CODE_GEN holds information about non scalar arguments. */ + +static bool +opencl_analyse_data_access_p (opencl_main code_gen, + htab_t access, + opencl_clast_meta meta) +{ + htab_iterator h_iter; + opencl_pair curr; + int max_dim = 1; + int i; + opencl_data curr_data; + VEC (opencl_data, heap) * data_objs = VEC_alloc (opencl_data, heap, + OPENCL_INIT_BUFF_SIZE); + + FOR_EACH_HTAB_ELEMENT (access, curr, opencl_pair, h_iter) + { + int id = curr->id; + opencl_data obj = opencl_get_data_by_id (code_gen, id); + VEC_safe_push (opencl_data, heap, data_objs, obj); + if (max_dim < obj->data_dim) + max_dim = obj->data_dim; + obj->depth = curr->val; + } + + for (i = 0; VEC_iterate (opencl_data, data_objs, i, curr_data); i++) + { + if (curr_data->data_dim != max_dim) + continue; + if (opencl_evaluate_data_access_p (curr_data, meta)) + return true; + } + return false; +} + +/* Main predicate which checks whether statement, represented by META and + located on depth DEPTH, should be replaced by opencl kernel launch. + CODE_GEN holds information about non scalar arguments. */ + +bool +opencl_should_be_parallel_p (opencl_main code_gen, + opencl_clast_meta meta, + int depth) +{ + int i_depth = meta->in_depth; + htab_t max_access_depth; + bool dump_p = dump_file && (dump_flags & TDF_DETAILS); + + if (dump_p) + fprintf (dump_file, "opencl_should_be_parallel_p: "); + + /* Avoid launching a lot of small kernels in a deep loop. */ + if (!flag_graphite_opencl_no_depth_check) + if (depth > i_depth + opencl_base_depth_const) + { + if (dump_p) + fprintf (dump_file, "avoiding small kernel in deep loop\n"); + return false; + } + + max_access_depth = htab_create (OPENCL_INIT_BUFF_SIZE, + opencl_pair_to_hash, + opencl_pair_cmp, free); + + /* Can't parallelize if statements in loop contain unsupported types. */ + if (!flag_graphite_opencl_no_types_check) + if (!opencl_calc_max_depth_tab (meta, max_access_depth, 0)) + { + htab_delete (max_access_depth); + if (dump_p) + fprintf (dump_file, "unsupported types\n"); + + return false; + } + + /* Can't parallelize if memory transfer is not reasonable. */ + if (!flag_graphite_opencl_no_memory_transfer_check + && !flag_graphite_opencl_cpu + && !opencl_analyse_data_access_p (code_gen, max_access_depth, meta)) + { + htab_delete (max_access_depth); + if (dump_p) + fprintf (dump_file, "avoiding large memory transfer\n"); + return false; + } + + htab_delete (max_access_depth); + + if (dump_p) + fprintf (dump_file, "ok\n"); + + return true; +} + +#endif diff --git a/gcc/graphite-opencl.c b/gcc/graphite-opencl.c new file mode 100644 index 0000000..9c28d41 --- /dev/null +++ b/gcc/graphite-opencl.c @@ -0,0 +1,2913 @@ +/* GRAPHITE-OpenCL pass. + Copyright (C) 2009, 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This pass implements transformation of perfect loop nests without + dependencies to OpenCL kernels. + + For example, loop nest like this + + | for (s_i = lb_i; s_i <= ub_i; s_i += stride_i) + | for (s_{i_1} = lb_{i+1}; s_{i+1} <= ub_{i+1}; s_{i+1} += stride_{i+1}) + | ... + | for (s_j = lb_j; s_j <= ub_j; s_j += stride_j) + | { + | STMT(s_i, s_{i+1}, ..., s_j); + | } + + will be transformed to this OpenCL kernel and all required OpenCL + calls will be created. + + | __kernel void opencl_auto_function_N + | (base_i, mod_i, step_i, first_i, + | ..., + | base_j, mod_j, step_i, first_j, + | ...) + | { + | unsigned int global_id = get_global_id (0); + | unsigned int s_i = ((global_id / base_i) % mod_i) * step_i + first_i; + | unsigned int s_j = ((global_id / base_j) % mod_j) * step_j + first_j; + | STMT(s_i, s_{i+1}, ..., s_j); + | } +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "toplev.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "domwalk.h" +#include "value-prof.h" +#include "pointer-set.h" +#include "gimple.h" +#include "sese.h" +#include "output.h" +#include +#include "hashtab.h" +#include "tree.h" + +#ifdef HAVE_cloog +#include "cloog/cloog.h" +#include "ppl_c.h" +#include "graphite-ppl.h" +#include "graphite.h" +#include "graphite-poly.h" +#include "graphite-scop-detection.h" +#include "graphite-clast-to-gimple.h" +#include "graphite-dependences.h" +#include "dyn-string.h" +#include "graphite-opencl.h" + +/* Enum for all OpenCL functions used in GRAPHITE-OpenCL. */ + +enum OPENCL_FUNCTUONS + { + STATIC_INIT = 0, + CREATE_CONTEXT_FROM_TYPE = 1, + GET_CONTEXT_INFO = 2, + CREATE_COMMAND_QUEUE = 3, + CREATE_PROGRAM_WITH_SOURCE = 4, + BUILD_PROGRAM = 5, + CREATE_KERNEL = 6, + CREATE_BUFFER = 7, + SET_KERNEL_ARG = 8, + ENQUEUE_ND_RANGE_KERNEL = 9, + ENQUEUE_READ_BUFFER = 10, + ENQUEUE_WRITE_BUFFER = 11, + RELEASE_MEMORY_OBJ = 12, + RELEASE_CONTEXT = 13, + RELEASE_COMMAND_QUEUE = 14, + RELEASE_PROGRAM = 15, + RELEASE_KERNEL = 16, + GET_PLATFORM_IDS = 17, + WAIT_FOR_EVENTS = 18 + }; + +/* Constructors and destructors. */ +static opencl_main opencl_main_create (CloogNames *, sese, edge, htab_t); +static void opencl_main_delete (opencl_main); +static void opencl_clast_meta_delete (opencl_clast_meta); +static tree opencl_create_function_decl (enum OPENCL_FUNCTUONS); +static edge opencl_create_function_call (edge); +static void opencl_init_data (scop_p, opencl_main); +static int opencl_get_non_scalar_type_depth (tree); +static tree opencl_create_memory_for_pointer (opencl_data); +static void opencl_init_basic_blocks (opencl_main); +static edge opencl_set_context_properties (edge, tree); +static tree opencl_create_clCreateContextFromType (tree); +static tree opencl_create_clGetContextInfo_1 (tree); +static void opencl_create_gimple_variables (void); +static tree opencl_create_clCreateCommandQueue (tree); +static tree opencl_create_malloc_call (tree); +static edge opencl_create_init_context (edge); +static void opencl_wait_for_event (opencl_main, tree); +static void opencl_transform_stmt_list (struct clast_stmt *, opencl_main, int); +static void opencl_create_gimple_for_body (opencl_body, opencl_main); + +/* Data structure to be used in data_reference_p to opencl_data hash + table. */ +struct map_ref_to_data_def +{ + data_reference_p key; + opencl_data value; +}; + +typedef struct map_ref_to_data_def * map_ref_to_data; + +/* Calculate hash value from map_ref_to_data. */ + +static hashval_t +map_ref_to_data_to_hash (const void * data) +{ + const struct map_ref_to_data_def * obj + = (const struct map_ref_to_data_def *)data; + return htab_hash_pointer (obj->key); +} + +/* Compare to map_ref_to_data pointers. */ + +static int +map_ref_to_data_cmp (const void * v1, const void * v2) +{ + const struct map_ref_to_data_def * obj_1 + = (const struct map_ref_to_data_def *)v1; + const struct map_ref_to_data_def * obj_2 + = (const struct map_ref_to_data_def *)v2; + + return (obj_1->key == obj_2->key); +} + +/* Create new map_ref_to_data with NEW_KEY as key and NEW_VALUE as value. */ + +static map_ref_to_data +map_ref_to_data_create (data_reference_p new_key, + opencl_data new_value) +{ + map_ref_to_data tmp = XNEW (struct map_ref_to_data_def); + tmp->key = new_key; + tmp->value = new_value; + return tmp; +} + +/* Data structure to be used in tree to opencl_data hash table. */ + +struct map_tree_to_data_def +{ + tree key; + opencl_data value; +}; + +typedef struct map_tree_to_data_def * map_tree_to_data; + +/* Calculate hash value from map_tree_to_data. */ + +static hashval_t +map_tree_to_data_to_hash (const void * data) +{ + const struct map_tree_to_data_def * obj + = (const struct map_tree_to_data_def *)data; + return htab_hash_pointer (obj->key); +} + +/* Compare to map_tree_to_data pointers. */ + +static int +map_tree_to_data_cmp (const void * v1, const void * v2) +{ + const struct map_tree_to_data_def * obj_1 + = (const struct map_tree_to_data_def *)v1; + const struct map_tree_to_data_def * obj_2 + = (const struct map_tree_to_data_def *)v2; + + return (obj_1->key == obj_2->key); +} + +/* Create new map_tree_to_data with NEW_KEY as key and NEW_VALUE as value. */ + +static map_tree_to_data +map_tree_to_data_create (tree new_key, + opencl_data new_value) +{ + map_tree_to_data tmp = XNEW (struct map_tree_to_data_def); + tmp->key = new_key; + tmp->value = new_value; + return tmp; +} + +/* Create and init new temporary variable with name NAME and + type TYPE. */ + +static tree +opencl_create_tmp_var (tree type, const char * name) +{ + tree tmp = create_tmp_var (type, name); + TREE_ADDRESSABLE (tmp) = 1; + return tmp; +} + +/* Create new var in basic block DEST to store EXPR and return it. */ + +tree +opencl_tree_to_var (basic_block dest, tree expr) +{ + tree type = TREE_TYPE (expr); + tree var = opencl_create_tmp_var (type, "__ocl_general_tmp_var"); + gimple_stmt_iterator g_iter = gsi_last_bb (dest); + + tree call = build2 (MODIFY_EXPR, type, var, expr); + + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + + return var; +} + +/* Set rw flags to false for all datas, referenced in CODE_GEN. */ + +static void +opencl_fflush_rw_flags (opencl_main code_gen) +{ + VEC (opencl_data, heap) * datas = code_gen->opencl_function_data; + int i; + opencl_data curr; + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + { + curr->written_in_current_body = false; + curr->read_in_current_body = false; + curr->privatized = false; + } +} + +/* Create new basic block on CODE_GEN->main edge and update it. */ + +basic_block +opencl_create_bb (opencl_main code_gen) +{ + basic_block tmp = split_edge (code_gen->main_edge); + code_gen->main_edge = single_succ_edge (tmp); + return tmp; +} + +/* All kernels of current function. */ +static VEC(tree, heap) *opencl_function_kernels; + +/* OpenCL code for all kernels of current function. */ +static dyn_string_t main_program_src; + +/* Delete clast meta DATA. */ + +static void +opencl_clast_meta_delete (opencl_clast_meta data) +{ + if (!data) + return; + opencl_clast_meta_delete (data->body); + opencl_clast_meta_delete (data->next); + BITMAP_FREE (data->modified_on_device); + BITMAP_FREE (data->modified_on_host); + if (data->access != NULL) + BITMAP_FREE (data->access); + if (data->can_be_private) + BITMAP_FREE (data->can_be_private); + free (data); +} + +static inline void +opencl_verify (void) +{ +#ifdef ENABLE_CHECKING + verify_loop_structure (); + verify_dominators (CDI_DOMINATORS); + verify_loop_closed_ssa (true); +#endif +} + +/* OpenCL definitions. */ +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_TRUE 1 + +/* Names of all OpenCL functions, used in GRAPHITE-OpenCL. */ + +static const char * opencl_function_names[] = + { + "clCreateContextFromType", + "clGetContextInfo", + "clCreateCommandQueue", + "clCreateProgramWithSource", + "clBuildProgram", + "clCreateKernel", + "clCreateBuffer", + "clSetKernelArg", + "clEnqueueNDRangeKernel", + "clEnqueueReadBuffer", + "clEnqueueWriteBuffer", + "clReleaseMemObject", + "clReleaseContext", + "clReleaseCommandQueue", + "clReleaseProgram", + "clReleaseKernel", + "clGetPlatformIDs", + "clWaitForEvents" + }; + +#endif +/* Variable, which holds OpenCL context. */ +static GTY(()) tree h_context; + +/* Variable, which holds OpenCL comman queue. */ +static GTY(()) tree h_cmd_queue; + +/* Variable, which holds OpenCL program for current function. */ +static GTY(()) tree h_program; + +#ifdef HAVE_cloog + +/* This vector holds opencl_data, which represents arrays. + Arrays have constant sizes, so buffers for each of them can + be created only once. */ +static VEC (opencl_data, heap) * opencl_array_data; + +/* Hash table, which maps opencl_data, related to arrays, to + trees, which represents corresponding array. */ +static htab_t array_data_to_tree; + +/* Check whether VAR is a zero dimension array. */ + +static bool +zero_dim_array_p (tree var) +{ + tree type = TREE_TYPE (var); + tree domain; + tree up_bound; + + if (TREE_CODE (type) != ARRAY_TYPE) + return false; + if (TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE) + return false; + domain = TYPE_DOMAIN (type); + if (domain == NULL) + return false; + up_bound = TYPE_MAX_VALUE (domain); + if (TREE_CODE (up_bound) != INTEGER_CST) + return false; + return TREE_INT_CST_LOW (up_bound) == 0; +} + +/* Check whether NAME is the name of the artificial array, which can be + privatized. */ + +static bool +opencl_private_var_name_p (const char * name) +{ + static const char * general_reduction = "General_Reduction"; + static const char * close_phi = "Close_Phi"; + static const char * cross_bb = "Cross_BB_scalar_dependence"; + static const char * commutative = "Commutative_Associative_Reduction"; + if (!name) + return false; + return + ((strstr (name, general_reduction) == name) + || (strstr (name, close_phi) == name) + || (strstr (name, commutative) == name) + || (strstr (name, cross_bb) == name)); +} + +/* Check whether VAR is an artificial array, which can be privatized. */ + +static bool +graphite_artificial_array_p (tree var) +{ + tree name; + if (TREE_CODE (var) != VAR_DECL) + return false; + if (!zero_dim_array_p (var)) + return false; + name = DECL_NAME (var); + if (!name) + return false; + return opencl_private_var_name_p (IDENTIFIER_POINTER (name)); +} + +/* Constructors & destructors. + _create - creates a new object of such type and returns it. + _delete - delete object (like destructor). */ + +static opencl_data +opencl_data_create (tree var, tree size) +{ + opencl_data tmp = XNEW (struct opencl_data_def); + tree type = TREE_TYPE (var); + + tmp->can_be_private = graphite_artificial_array_p (var); + tmp->exact_object = var; + + tmp->supported = TREE_CODE (var) == VAR_DECL || TREE_CODE (var) == SSA_NAME; + + if (TREE_CODE (type) == ARRAY_TYPE) + var = build_addr (var, current_function_decl); + + tmp->data_dim = opencl_get_non_scalar_type_depth (type); + tmp->object = var; + + tmp->size_value = size; + tmp->size_variable + = opencl_create_tmp_var (size_type_node, "__opencl_data_size"); + + tmp->up_to_date_on_host = true; + tmp->up_to_date_on_device = true; + tmp->used_on_device = false; + tmp->ever_read_on_device = false; + tmp->ever_written_on_device = false; + return tmp; +} + +static void +opencl_data_delete (opencl_data data) +{ + free (data); +} + +static opencl_main +opencl_main_create (CloogNames * names, sese region, edge main_edge, + htab_t params_index) +{ + opencl_main tmp = XNEW (struct graphite_opencl_creator); + tmp->root_names = names; + tmp->defined_vars = NULL; + tmp->global_defined_vars = NULL; + tmp->region = region; + tmp->main_edge = main_edge; + tmp->main_program = dyn_string_new (OPENCL_INIT_BUFF_SIZE); + tmp->current_body = NULL; + tmp->clast_meta = NULL; + tmp->curr_meta = NULL; + tmp->params_index = params_index; + tmp->newivs_index = htab_create (10, clast_name_index_elt_info, + eq_clast_name_indexes, free); + tmp->ref_to_data = htab_create (10, map_ref_to_data_to_hash, + map_ref_to_data_cmp, free); + tmp->tree_to_data = htab_create (10, map_tree_to_data_to_hash, + map_tree_to_data_cmp, free); + tmp->newivs = VEC_alloc (tree, heap, 10); + tmp->context_loop = SESE_ENTRY (region)->src->loop_father; + tmp->opencl_function_data = VEC_alloc (opencl_data, heap, + OPENCL_INIT_BUFF_SIZE); + return tmp; +} + +static void +opencl_main_delete (opencl_main data) +{ + int i; + opencl_data curr; + dyn_string_delete (data->main_program); + htab_delete (data->newivs_index); + htab_delete (data->ref_to_data); + htab_delete (data->tree_to_data); + opencl_clast_meta_delete (data->clast_meta); + for (i = 0; VEC_iterate (opencl_data, data->opencl_function_data, i, curr); + i++) + if (!curr->is_static) + opencl_data_delete (curr); + VEC_free (tree, heap, data->newivs); + VEC_free (opencl_data, heap, data->opencl_function_data); + free (data); +} + +/* Get depth of type TYPE scalar (base) part. */ + +static int +opencl_get_non_scalar_type_depth (tree type) +{ + int count = 0; + while (TREE_CODE (type) == ARRAY_TYPE + || TREE_CODE (type) == POINTER_TYPE) + { + count++; + type = TREE_TYPE (type); + } + return count; +} + +/* Add function call CALL to edge SRC. If FLAG_GRAPHITE_OPENCL_DEBUG is + enabled, then add the following: + + | int result = call (); + | if (call == 0 != ZERO_RETURN) + | abort (); + + Otherwise just add CALL as function call. */ + +static edge +opencl_add_safe_call_on_edge (tree call, bool zero_return, edge src) +{ + if (!flag_graphite_opencl_debug) + { + basic_block bb = split_edge (src); + gimple_stmt_iterator g_iter = gsi_last_bb (bb); + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + return single_succ_edge (bb); + } + else + { + tree cmp; + edge result; + basic_block abort_bb; + tree abort_funtion; + tree abort_call; + gimple_stmt_iterator g_iter; + if (zero_return) + { + tree correct_result = build1 (CONVERT_EXPR, TREE_TYPE (call), + integer_zero_node); + cmp = build2 (EQ_EXPR, boolean_type_node, + call, correct_result); + } + else + { + tree incorrect_result = build1 (CONVERT_EXPR, TREE_TYPE (call), + null_pointer_node); + + cmp = build2 (NE_EXPR, boolean_type_node, + call, incorrect_result); + } + result = create_empty_if_region_on_edge (src, cmp); + abort_bb = get_false_edge_from_guard_bb (src->dest)->dest; + abort_funtion = build_fn_decl ("abort", build_function_type_list + (void_type_node, NULL_TREE)); + + abort_call = build_call_expr (abort_funtion, 0); + + g_iter = gsi_last_bb (abort_bb); + force_gimple_operand_gsi (&g_iter, abort_call, true, NULL, false, + GSI_CONTINUE_LINKING); + return result; + } +} + +/* Add call CALL to the current edge from CODE_GEN. ZERO_RETURN indicates + whether NULL or 0 is the success return value for CALL. */ + +static void +opencl_add_safe_call (opencl_main code_gen, tree call, bool zero_return) +{ + code_gen->main_edge + = opencl_add_safe_call_on_edge (call, zero_return, code_gen->main_edge); +} + +/* Get base object for OBJ. */ + +tree +opencl_get_base_object_by_tree (tree obj) +{ + while (TREE_CODE (obj) == INDIRECT_REF + || TREE_CODE (obj) == ARRAY_REF) + obj = TREE_OPERAND (obj, 0); + return obj; +} + +/* Get base object from data reference DR. */ + +tree +dr_outermost_base_object (data_reference_p dr) +{ + tree addr = DR_BASE_ADDRESS (dr); + if (!addr) + { + /* In case, we don't know base object. For example: + + | void foo (int * a) + | { + | int * b = a + 1; + | *b = 0; + | } + + Just return the innermost object when the base address is unknown. */ + tree ref = DR_REF (dr); + return opencl_get_base_object_by_tree (ref); + } + + if (TREE_CODE (addr) == ADDR_EXPR) + addr = TREE_OPERAND (addr, 0); + return addr; +} + +/* Get correct basic block for data with DATA_ID transfer. If DEVICE + is true, then it's host to device transfer, otherwise it's device + to host transfer. CODE_GEN holds information related to code + generation. */ + +static edge +opencl_get_edge_for_init (opencl_main code_gen, int data_id, bool device) +{ + opencl_clast_meta curr = code_gen->curr_meta; + if (!curr) + return NULL; + + while (curr->parent) + { + opencl_clast_meta parent = curr->parent; + bitmap curr_bitmap + = device ? parent->modified_on_host : parent->modified_on_device; + if (bitmap_bit_p (curr_bitmap, data_id)) + break; + curr = curr->parent; + } + return curr->init_edge; +} + +/* Add host to device memory transfer. DATA - data, which must be + transfered to device. CODE_GEN holds information related to code + generation. */ + +static tree +opencl_pass_to_device (opencl_main code_gen, opencl_data data) +{ + edge init_edge; + tree function; + tree call; + + tree index_type = build_index_type (build_int_cst (NULL_TREE, 2)); + tree array_type = build_array_type (ptr_type_node, index_type); + tree var = opencl_create_tmp_var (array_type, "wait_event"); + tree event_call; + + TREE_STATIC (var) = 1; + assemble_variable (var, 1, 0, 1); + + /* If data is wrutten in device, mark it is not up to date on host. */ + if (data->written_in_current_body) + data->up_to_date_on_host = false; + + /* If data is up to date on device, but it was initialized befor + current loop, then mark it as initialized in current loop and + store it. + + Consider an example: D - device, H - host, W - write, R - read. + + | HW(1) -- LOOP + | /\ + | / \ + | / \ + | HR(2) DW(3) + + While analyzing statement (2), data will be up to date on host + because of statement (1), but while executing after (3) in loop, + (2) will read incorrect data. + + So, we have to add device to host memory transfer after statement (3). + */ + if (flag_graphite_opencl_cpu) + return data->device_object; + + if (data->up_to_date_on_device) + { + if (!data->inited_in_current_loop_on_device + && code_gen && code_gen->curr_meta + && code_gen->curr_meta->parent) + VEC_safe_push (opencl_data, heap, + code_gen->curr_meta->parent->post_pass_to_device, + data); + + data->inited_in_current_loop_on_device = true; + return data->device_object; + } + + data->inited_in_current_loop_on_device = true; + init_edge = opencl_get_edge_for_init (code_gen, data->id, true); + + /* Add gimple. */ + function = opencl_create_function_decl (ENQUEUE_WRITE_BUFFER); + + event_call = build4 (ARRAY_REF, ptr_type_node, var, + integer_zero_node, NULL_TREE, NULL_TREE); + event_call = build_addr (event_call, current_function_decl); + call = build_call_expr (function, 9, + h_cmd_queue, + data->device_object, + build_int_cst (NULL_TREE, CL_TRUE), + integer_zero_node, + data->size_variable, + data->object, + integer_zero_node, + null_pointer_node, + event_call); + if (init_edge) + opencl_add_safe_call_on_edge (call, true, init_edge); + else + opencl_add_safe_call (code_gen, call, true); + data->up_to_date_on_device = true; + opencl_wait_for_event (code_gen, event_call); + return data->device_object; +} + +/* Add device to host memory transfer. DATA - data, which must be + transfered to host. CODE_GEN holds information related to code + generation. */ + +static void +opencl_pass_to_host (opencl_main code_gen, opencl_data data) +{ + edge init_edge; + tree function; + tree curr_type; + tree curr; + tree call; + + tree index_type = build_index_type (build_int_cst (NULL_TREE, 2)); + tree array_type = build_array_type (ptr_type_node, index_type); + tree var = opencl_create_tmp_var (array_type, "wait_event"); + tree event_call; + + TREE_STATIC (var) = 1; + assemble_variable (var, 1, 0, 1); + if (data->written_in_current_body) + data->up_to_date_on_device = false; + if (data->up_to_date_on_host) + { + if (!data->inited_in_current_loop_on_host + && code_gen && code_gen->curr_meta && + code_gen->curr_meta->parent) + VEC_safe_push (opencl_data, heap, + code_gen->curr_meta->parent->post_pass_to_host, data); + + data->inited_in_current_loop_on_host = true; + return; + } + + data->inited_in_current_loop_on_host = true; + + if (flag_graphite_opencl_cpu) + return; + + if (data->privatized) + return; + + init_edge = opencl_get_edge_for_init (code_gen, data->id, false); + + function = opencl_create_function_decl (ENQUEUE_READ_BUFFER); + curr_type = TREE_TYPE (data->object); + curr = data->object; + + if (TREE_CODE (curr_type) == ARRAY_TYPE) + curr = build_addr (curr, current_function_decl); + + event_call = build4 (ARRAY_REF, ptr_type_node, var, + integer_zero_node, NULL_TREE, NULL_TREE); + event_call = build_addr (event_call, current_function_decl); + + + call = build_call_expr (function, 9, + h_cmd_queue, + data->device_object, + build_int_cst (NULL_TREE, CL_TRUE), + integer_zero_node, + data->size_variable, + curr, integer_zero_node, + null_pointer_node, + event_call); + + if (init_edge) + opencl_add_safe_call_on_edge (call, true, init_edge); + else + opencl_add_safe_call (code_gen, call, true); + opencl_wait_for_event (code_gen, event_call); + data->up_to_date_on_host = true; +} + +/* Pass all data from device to host. This function must be called when + we need all data to be up to date on host. CODE_GEN holds information + related to code generation. */ + +static void +opencl_fflush_all_device_buffers_to_host (opencl_main code_gen) +{ + VEC (opencl_data, heap) * datas = code_gen->opencl_function_data; + int i; + opencl_data curr; + tree function = opencl_create_function_decl (RELEASE_MEMORY_OBJ); + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + { + curr->written_in_current_body = true; + opencl_pass_to_host (code_gen, curr); + } + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + { + if (curr->used_on_device && !curr->is_static) + { + tree var = curr->device_object; + tree call = build_call_expr (function, 1, var); + opencl_add_safe_call (code_gen, call, true); + } + } +} + +/* Create memory buffers on host for all required host memory objects. + CODE_GEN holds information related to code generation. */ + +static void +opencl_init_all_device_buffers (opencl_main code_gen) +{ + VEC (opencl_data, heap) * datas = code_gen->opencl_function_data; + int i; + opencl_data curr; + edge data_init_edge = single_succ_edge (code_gen->data_init_bb); + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + { + tree tmp; + if (!curr->used_on_device || curr->is_static) + continue; + + tmp = opencl_create_memory_for_pointer (curr); + tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp); + data_init_edge = opencl_add_safe_call_on_edge (tmp, false, + data_init_edge); + } +} + +/* Create new static void * variable with name __ocl_ + NAME. */ + +static tree +opencl_create_static_ptr_variable (const char * name) +{ + const char * id_name = concat ("__ocl_",name, NULL); + tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, + create_tmp_var_name (id_name), ptr_type_node); + TREE_STATIC (var) = 1; + TREE_PUBLIC (var) = 0; + DECL_ARTIFICIAL (var) = 1; + TREE_USED (var) = 1; + TREE_ADDRESSABLE (var) = 1; + DECL_INITIAL (var) = null_pointer_node; + assemble_variable (var, 1, 0, 1); + + return var; +} + +/* Insert several opencl calls to output program. */ + +/* | cl_program h_program; + | h_program = clCreateProgramWithSource (h_context, 1, + | sProgramSource, 0, 0); + + SRC is the program source code and DEST is the edge where + call must be inserted. */ + +static edge +opencl_insert_create_program_with_source_call (const char * src, edge base) +{ + /* Get tree with function definition. */ + tree function = opencl_create_function_decl (CREATE_PROGRAM_WITH_SOURCE); + tree code_tree = build_string_literal (strlen (src) + 1, src); + tree call; + basic_block bb = split_edge (base); + tree tmp_var = opencl_tree_to_var (bb, code_tree); + + call = build_call_expr (function, 5, h_context, + integer_one_node, + build_addr (tmp_var, current_function_decl), + null_pointer_node, + null_pointer_node); + + call = build2 (MODIFY_EXPR, ptr_type_node, + h_program, call); + return opencl_add_safe_call_on_edge (call, false, single_succ_edge (bb)); +} + +/* clBuildProgram (h_program, 0, 0, 0, 0, 0); + BASE is the edge where call must be inserted. */ + +static edge +opencl_insert_build_program_call (edge base) +{ + tree function = opencl_create_function_decl (BUILD_PROGRAM); + tree call = build_call_expr (function, 6, + h_program, + integer_zero_node, null_pointer_node, + null_pointer_node, null_pointer_node, + null_pointer_node); + return opencl_add_safe_call_on_edge (call, true, base); +} + +/* cl_kernel tmm_kernel; + tmp_kernel = clCreateKernel (h_program, func_name, 0); + FUNCTION_NAME is the name of the kernel function, + CODE_GEN holds information related to code generation. */ + +static tree +opencl_insert_create_kernel_call (opencl_main code_gen, + const char *function_name) +{ + tree new_kernel_var; + basic_block bb; + gimple_stmt_iterator g_iter; + tree function; + tree kernel_name; + tree call; + tree tmp_tree; + + new_kernel_var = opencl_create_tmp_var (ptr_type_node, function_name); + bb = split_edge (code_gen->kernel_edge); + g_iter = gsi_last_bb (bb); + function = opencl_create_function_decl (CREATE_KERNEL); + kernel_name = build_string_literal (strlen (function_name) + 1, + function_name); + call = build_call_expr (function, 3, h_program, kernel_name, + null_pointer_node); + tmp_tree = build2 (MODIFY_EXPR, ptr_type_node, + new_kernel_var, call); + + code_gen->kernel_edge = single_succ_edge (bb); + force_gimple_operand_gsi (&g_iter, tmp_tree, true, NULL, false, + GSI_CONTINUE_LINKING); + VEC_safe_push (tree, heap, opencl_function_kernels, new_kernel_var); + code_gen->kernel_edge + = opencl_add_safe_call_on_edge (new_kernel_var, false, + code_gen->kernel_edge); + return new_kernel_var; +} + +/* Init memory on device. Only one levell of pointers are suppoted. + So in case of char ** only array of char * will be created. + Function return tree, corresponding to new pointer (pointer + on device). + + | cl_mem clCreateBuffer (cl_context context, + | cl_mem_flags flags, + | size_t size, + | void *host_ptr, + | cl_int *errcode_ret) */ + +/* Calculate size of data reference, represented by REF. PTR is a + base object of data reference. */ + +static tree +opencl_get_indirect_size (tree ptr, poly_dr_p ref) +{ + ptr = TREE_TYPE (ptr); + switch (TREE_CODE (ptr)) + { + case ARRAY_TYPE: + return TYPE_SIZE_UNIT (ptr); + + case POINTER_TYPE: + { + tree inner_type = TREE_TYPE (ptr); + tree t = graphite_outer_subscript_bound (ref, false); + tree inner_type_size = TYPE_SIZE_UNIT (inner_type); + if (inner_type_size == NULL) + return NULL; + + if (DECL_P (inner_type_size)) + add_referenced_var (inner_type_size); + + gcc_assert (t); + t = fold_build2 (TRUNC_DIV_EXPR, sizetype, t, inner_type_size); + t = fold_build2 (PLUS_EXPR, sizetype, t, size_one_node); + t = fold_build2 (MULT_EXPR, sizetype, t, inner_type_size); + return t; + } + default: + return NULL_TREE; + } + gcc_unreachable (); +} + +/* Calculate correct flags for clCreateBuffer. READ means, that + buffer must be readable on device, WRITE - that buffer must be + writable on device. */ + +static int +opencl_get_mem_flags (bool read, bool write) +{ + int rw_flags; + int location_flags; + gcc_assert (read || write); + if (write && read) + rw_flags = CL_MEM_READ_WRITE; + else + { + if (read) + rw_flags = CL_MEM_READ_ONLY; + else + rw_flags = CL_MEM_WRITE_ONLY; + } + if (flag_graphite_opencl_cpu) + location_flags = CL_MEM_USE_HOST_PTR; + else + location_flags = CL_MEM_COPY_HOST_PTR; + return location_flags | rw_flags; +} + +/* Create memory on device for DATA and init it by data from host. + ptr is pointer to host memory location. Function returns tree, + corresponding to memory location on device. */ + +static tree +opencl_create_memory_for_pointer (opencl_data data) +{ + tree ptr = data->object; + tree arr_size = data->size_variable; + tree function = opencl_create_function_decl (CREATE_BUFFER); + bool ever_read = data->ever_read_on_device; + bool ever_written = data->ever_written_on_device; + tree mem_flags = build_int_cst (NULL_TREE, + opencl_get_mem_flags (ever_read, + ever_written)); + if (TREE_CODE (TREE_TYPE (ptr)) == ARRAY_TYPE) + ptr = build_addr (ptr, current_function_decl); + + if (flag_graphite_opencl_debug) + { + tree result = opencl_create_tmp_var (integer_type_node, + "__opencl_create_buffer_result"); + + return build_call_expr (function, 5, + h_context, mem_flags, + arr_size, ptr, + build1 (ADDR_EXPR, + integer_ptr_type_node, + result)); + } + else + return build_call_expr (function, 5, + h_context, mem_flags, + arr_size, ptr, null_pointer_node); +} + +/* Create variables for kernel KERNEL arguments. Each argument is + represented by new variable with it's value and it's size. If arg + is a pointer or array, it's represented by device buffer with data + from host memory. CODE_GEN holds information related to code + generation. */ + +static void +opencl_init_local_device_memory (opencl_main code_gen, opencl_body kernel) +{ + VEC (tree, heap) ** args = &kernel->function_args; + VEC (tree, heap) ** args_to_pass = &kernel->function_args_to_pass; + VEC (opencl_data, heap) ** refs = &kernel->data_refs; + tree curr; + opencl_data curr_data; + int i; + basic_block bb = opencl_create_bb (code_gen); + basic_block kernel_bb = split_edge (code_gen->kernel_edge); + code_gen->kernel_edge = single_succ_edge (kernel_bb); + for (i = 0; VEC_iterate (tree, *args, i, curr); i ++) + { + gimple_stmt_iterator g_iter = gsi_last_bb (bb); + gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb); + tree curr_type = TREE_TYPE (curr); + tree new_type; + tree tmp_var; + tree mov; + tree curr_var = opencl_create_tmp_var (curr_type, "__ocl_iv"); + if (TREE_CODE (curr) != PARM_DECL + && TREE_CODE (curr) != VAR_DECL) + { + mov = build2 (MODIFY_EXPR, curr_type, curr_var, curr); + + force_gimple_operand_gsi (&g_iter, mov, false, NULL, false, + GSI_CONTINUE_LINKING); + } + else + force_gimple_operand_gsi (&g_iter, curr, false, curr_var, false, + GSI_CONTINUE_LINKING); + curr = curr_var; + + new_type = build_pointer_type (curr_type); + tmp_var = opencl_create_tmp_var (new_type, "__opencl_scalar_arg"); + mov = build1 (ADDR_EXPR, new_type, curr); + + mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov); + + force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false, + GSI_CONTINUE_LINKING); + VEC_safe_push (tree, heap, *args_to_pass, tmp_var); + } + for (i = 0; VEC_iterate (opencl_data, *refs, i, curr_data); i++) + { + gimple_stmt_iterator kernel_g_iter = gsi_last_bb (kernel_bb); + tree new_type; + tree tmp_var; + tree mov; + + tree curr = opencl_pass_to_device (code_gen, curr_data); + tree curr_type = ptr_type_node; + + new_type = build_pointer_type (curr_type); + tmp_var = opencl_create_tmp_var (new_type, "__opencl_non_scalar_arg"); + mov = build1 (ADDR_EXPR, new_type, curr); + + mov = build2 (MODIFY_EXPR, new_type, tmp_var, mov); + + force_gimple_operand_gsi (&kernel_g_iter, mov, false, NULL, false, + GSI_CONTINUE_LINKING); + VEC_safe_push (tree, heap, *args_to_pass, tmp_var); + + } +} + +/* cl_int clSetKernelArg (cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void *arg_value) + + Set all kernel args for OpenCL kernel, represented by KERNEL_VAR. + KERNEL holds all data, related to given kernel. + CODE_GEN holds information related to code generation. + All arguments are passed by pointer. */ + +static void +opencl_pass_kernel_arguments (opencl_main code_gen, opencl_body kernel, + tree kernel_var) +{ + VEC (tree, heap) * args_to_pass = kernel->function_args_to_pass; + tree arg; + int i; + tree function = opencl_create_function_decl (SET_KERNEL_ARG); + for (i = 0; VEC_iterate (tree, args_to_pass, i, arg); i++) + { + tree call + = build_call_expr (function, 4, kernel_var, + build_int_cst (NULL_TREE, i), + TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (arg))), + arg); + opencl_add_safe_call (code_gen, call, true); + } +} + +/* clEnqueueNDRangeKernel (h_cmd_queue, hKernel, 1, 0, + &cnDimension, 0, 0, 0, 0); + + Execute kernel, represented by KERNEL_VAR in NUM_OF_EXEC threads. + Use EVENT_VAR as event variable for asynchronous call. + CODE_GEN holds information related to code generation. */ + +static void +opencl_execute_kernel (opencl_main code_gen, tree num_of_exec, + tree kernel_var, tree event_var) +{ + tree function = opencl_create_function_decl (ENQUEUE_ND_RANGE_KERNEL); + tree num_of_threads = opencl_create_tmp_var (integer_type_node, + "__opencl_num_of_exec"); + gimple_stmt_iterator g_iter = gsi_last_bb (opencl_create_bb (code_gen)); + tree call; + + TREE_STATIC (num_of_threads) = 1; + assemble_variable (num_of_threads, 1, 0, 1); + + call = build2 (MODIFY_EXPR, integer_type_node, num_of_threads, num_of_exec); + + + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + + call = build1 (ADDR_EXPR, integer_ptr_type_node, num_of_threads); + + call = build_call_expr (function, 9, + h_cmd_queue, + kernel_var, + integer_one_node, + null_pointer_node, + call, + null_pointer_node, + integer_zero_node, + null_pointer_node, + event_var); + + opencl_add_safe_call (code_gen, call, true); +} + +/* Place building program from single source string to edge BASE. + Current implementation performs single build per function. + String contains kernels from all scops of current function. + Functions returns true if any kernel has been created. */ + +static edge +opencl_create_function_call (edge base) +{ + edge new_edge; + const char * src; + + /* Required for addressing types with size less then 4 bytes. */ + dyn_string_prepend_cstr + (main_program_src, + "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n"); + /* Required for double type. */ + dyn_string_prepend_cstr + (main_program_src, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n "); + src = dyn_string_buf (main_program_src); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nGenerated OpenCL code: \n"); + fprintf (dump_file, "%s", src); + } + new_edge = opencl_insert_create_program_with_source_call (src, base); + + return opencl_insert_build_program_call (new_edge); +} + +/* Mark privatizable data for current loop nest. Information where + given data can be privatized is taken from meta information of + current loop nest, which is stored in CODE_GEN. */ + +static void +opencl_mark_privatized_data (opencl_main code_gen) +{ + VEC (opencl_data, heap) * datas = code_gen->opencl_function_data; + int i; + opencl_data curr; + bitmap can_be_private = code_gen->curr_meta->can_be_private; + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + curr->privatized = bitmap_bit_p (can_be_private, curr->id); + +} + +/* Store calculated sizes of all pointers or arrays to variables. + CODE_GEN holds information related to code generation. */ + +static void +opencl_set_data_size (opencl_main code_gen) +{ + VEC (opencl_data, heap) * datas = code_gen->opencl_function_data; + int i; + opencl_data curr; + gimple_stmt_iterator g_iter = gsi_last_bb (code_gen->data_init_bb); + for (i = 0; VEC_iterate (opencl_data, datas, i, curr); i ++) + { + tree call; + if (curr->is_static) + continue; + if (!curr->used_on_device) + continue; + + if (curr->size_value == NULL) + continue; + + call = build2 (MODIFY_EXPR, size_type_node, + curr->size_variable, curr->size_value); + + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + } +} + +/* Transform clast statement DATA from scop SCOP to OpenCL calls + in region REGION. Place all calls to edge MAIN. PARAM_INDEX + holds external scop params. */ + +void +opencl_transform_clast (struct clast_stmt * data, sese region, + edge main, scop_p scop, htab_t params_index) +{ + opencl_main code_gen; + /* Create main data struture for code generation. */ + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nGenerating OpenCL code for SCoP: \n"); + print_scop (dump_file, scop, 0); + } + + code_gen = opencl_main_create (((struct clast_root *)data)->names, + region, main, params_index); + + opencl_init_basic_blocks (code_gen); + opencl_init_data (scop, code_gen); + + code_gen->clast_meta = opencl_create_meta_from_clast (code_gen, data, 1, + NULL); + code_gen->curr_meta = code_gen->clast_meta; + + opencl_transform_stmt_list (data, code_gen, 1); + if (dyn_string_length (code_gen->main_program) != 0) + { + dyn_string_append (main_program_src, code_gen->main_program); + opencl_set_data_size (code_gen); + opencl_init_all_device_buffers (code_gen); + opencl_fflush_all_device_buffers_to_host (code_gen); + } + recompute_all_dominators (); + update_ssa (TODO_update_ssa); + opencl_main_delete (code_gen); +} + +/* Prepare memory for gimple (host) statement, represented by PBB. + Copy memory from device to host if it's nessesary. + CODE_GEN holds information related to code generation. */ + +static void +opencl_prepare_memory_for_gimple_stmt (poly_bb_p pbb, opencl_main code_gen) +{ + VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb); + int i; + poly_dr_p curr; + for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++) + { + data_reference_p d_ref = (data_reference_p) PDR_CDR (curr); + opencl_data data; + bool is_read; + + /* Scalar variables can be passed directly. */ + data = opencl_get_data_by_data_ref (code_gen, d_ref); + /* Private variables should not be passed from device to host. */ + if (data->privatized) + continue; + is_read = DR_IS_READ (d_ref); + gcc_assert (data); + + data->read_in_current_body = is_read; + data->written_in_current_body = !is_read; + opencl_pass_to_host (code_gen, data); + + if (!is_read) + bitmap_set_bit (code_gen->curr_meta->modified_on_host, data->id); + } +} + +/* Add basic block from clast_user_stmt STMT to gimple. + CODE_GEN holds information related to code generation. */ + +static void +opencl_add_gimple_for_user_stmt (struct clast_user_stmt * stmt, + opencl_main code_gen) +{ + gimple_bb_p gbb; + CloogStatement *cs = stmt->statement; + poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs); + sese region = code_gen->region; + int nb_loops = number_of_loops (); + int i; + VEC (tree, heap) * iv_map = VEC_alloc (tree, heap, nb_loops); + htab_t newivs_index = code_gen->newivs_index; + VEC (tree, heap) * newivs = code_gen->newivs; + /* Get basic block to add. */ + gbb = PBB_BLACK_BOX (pbb); + + if (GBB_BB (gbb) == ENTRY_BLOCK_PTR) + return; + + /*Reset flags. */ + opencl_fflush_rw_flags (code_gen); + + /* Pass all required memory to host. */ + opencl_prepare_memory_for_gimple_stmt (pbb, code_gen); + + for (i = 0; i < nb_loops; i++) + VEC_quick_push (tree, iv_map, NULL_TREE); + + build_iv_mapping (iv_map, region, newivs, newivs_index, + stmt, code_gen->params_index); + code_gen->main_edge + = copy_bb_and_scalar_dependences (GBB_BB (gbb), region, + code_gen->main_edge, iv_map); + VEC_free (tree, heap, iv_map); + recompute_all_dominators (); + update_ssa (TODO_update_ssa); + + opencl_verify (); +} + +/* Add if statement, represented by S to current gimple. + CODE_GEN holds information related to code generation. */ + +static void +opencl_add_gimple_for_stmt_guard (struct clast_guard * s, + opencl_main code_gen, int depth) +{ + edge last_e = graphite_create_new_guard (code_gen->region, + code_gen->main_edge, s, + code_gen->newivs, + code_gen->newivs_index, + code_gen->params_index); + + edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest); + code_gen->main_edge = true_e; + opencl_transform_stmt_list (s->then, code_gen, depth); + code_gen->main_edge = last_e; + + recompute_all_dominators (); + opencl_verify (); +} + +/* Reset data structures before processing loop, represented by META. + CODE_GEN holds information related to code generation. */ + +static void +opencl_init_new_loop (opencl_clast_meta meta, opencl_main code_gen) +{ + opencl_data curr; + unsigned i; + meta->post_pass_to_host + = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE); + meta->post_pass_to_device + = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE); + + for (i = 0; + VEC_iterate (opencl_data, code_gen->opencl_function_data, i, curr); + i++) + { + curr->inited_in_current_loop_on_host = false; + curr->inited_in_current_loop_on_device = false; + } +} + +/* Post loop init. Loop through all data stored in POST_PASS_TO_HOST + and POST_PASS_TO_DEVICE vectors in META. This data must be up to + date on host or device respectively at the end of current loop. + CODE_GEN holds information related to code generation. */ + +static void +opencl_postpass_data (opencl_main code_gen, opencl_clast_meta meta) +{ + opencl_data curr; + unsigned i; + for (i = 0; + VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++) + { + curr->written_in_current_body = false; + opencl_pass_to_host (code_gen, curr); + } + + if (!flag_graphite_opencl_cpu) + for (i = 0; + VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++) + { + curr->written_in_current_body = false; + opencl_pass_to_device (code_gen, curr); + } + if (meta->parent) + { + VEC (opencl_data, heap) *parent_vec_host + = meta->parent->post_pass_to_host; + VEC (opencl_data, heap) *parent_vec_device + = meta->parent->post_pass_to_device; + + for (i = 0; + VEC_iterate (opencl_data, meta->post_pass_to_host, i, curr); i++) + VEC_safe_push (opencl_data, heap, parent_vec_host, curr); + + for (i = 0; + VEC_iterate (opencl_data, meta->post_pass_to_device, i, curr); i++) + VEC_safe_push (opencl_data, heap, parent_vec_device, curr); + } + VEC_free (opencl_data, heap, meta->post_pass_to_host); + VEC_free (opencl_data, heap, meta->post_pass_to_device); +} + +/* Add loop body, of the loop, represented by S, on host. + Loop body can contain device code. + DEPTH contains depth of given loop in current loop nest. + DEPENDENCY indicates where given loop has any dependencies. + CODE_GEN holds information related to code generation. */ + +static void +opencl_add_gimple_for_loop (struct clast_for * s, opencl_main code_gen, + int depth, bool dependency) +{ + loop_p old_parent = code_gen->context_loop; + loop_p new_loop + = graphite_create_new_loop (code_gen->region, + code_gen->main_edge, + s, code_gen->context_loop, + & code_gen->newivs, + code_gen->newivs_index, + code_gen->params_index, + depth); + + edge last_e = single_exit (new_loop); + edge to_body = single_succ_edge (new_loop->header); + basic_block after = to_body->dest; + opencl_clast_meta parent = code_gen->curr_meta->parent; + last_e = single_succ_edge (split_edge (last_e)); + + code_gen->context_loop = new_loop; + code_gen->main_edge = single_succ_edge (new_loop->header); + + opencl_init_new_loop (parent, code_gen); + opencl_transform_stmt_list (s->body, code_gen, depth + 1); + + code_gen->context_loop = old_parent; + + redirect_edge_succ_nodup (code_gen->main_edge, after); + set_immediate_dominator (CDI_DOMINATORS, code_gen->main_edge->dest, + code_gen->main_edge->src); + + opencl_postpass_data (code_gen, parent); + code_gen->main_edge = last_e; + + if (flag_loop_parallelize_all && !dependency) + new_loop->can_be_parallel = true; + opencl_verify (); +} + +/* Add loop, represented by S, on host. Loop body can contain device code. + DEPTH contains depth of given loop in current loop nest. + DEPENDENCY indicates where given loop has any dependencies. + CODE_GEN holds information related to code generation. */ + +static void +opencl_add_gimple_for_stmt_for (struct clast_for * s, opencl_main code_gen, + int depth, bool dependency) +{ + edge last_e = graphite_create_new_loop_guard (code_gen->region, + code_gen->main_edge, + s, code_gen->newivs, + code_gen->newivs_index, + code_gen->params_index); + + edge true_e = get_true_edge_from_guard_bb (code_gen->main_edge->dest); + code_gen->main_edge = true_e; + + opencl_add_gimple_for_loop (s, code_gen, depth, dependency); + code_gen->main_edge = last_e; +} + +/* Calculate parent data access flags in META based on children. + parent->modified_on_host = OR_{forall children} child->modified_on_host. + parent->modified_on_device = OR_{forall children} child->modified_on_device. +*/ + +static void +opencl_fix_meta_flags (opencl_clast_meta meta) +{ + opencl_clast_meta curr = meta->body; + while (curr) + { + bitmap_ior_into (meta->modified_on_host, curr->modified_on_host); + bitmap_ior_into (meta->modified_on_device, curr->modified_on_device); + curr = curr->next; + } +} + +/* Delete opencl_body DATA. */ + +static void +opencl_body_delete (opencl_body data) +{ + dyn_string_delete (data->body); + dyn_string_delete (data->header); + dyn_string_delete (data->pre_header); + dyn_string_delete (data->non_scalar_args); + VEC_free (tree, heap, data->function_args); + VEC_free (tree, heap, data->function_args_to_pass); + VEC_free (opencl_data, heap, data->data_refs); + free (data); +} + +/* Parse clast statement list S, located on depth DEPTH in current loop nest. + This function generates gimple from clast statements, but in case of + stmt_for either host or device code can be generated. + CODE_GEN holds information related to code generation. */ + +static void +opencl_transform_stmt_list (struct clast_stmt * s, opencl_main code_gen, + int depth) +{ + bool dump_p = dump_file && (dump_flags & TDF_DETAILS); + for ( ; s; s = s->next) + { + opencl_clast_meta tmp = code_gen->curr_meta; + if (CLAST_STMT_IS_A (s, stmt_root)) + continue; + else if (CLAST_STMT_IS_A (s, stmt_user)) + { + code_gen->curr_meta->init_edge = code_gen->main_edge; + opencl_add_gimple_for_user_stmt ((struct clast_user_stmt *) s, + code_gen); + code_gen->curr_meta = code_gen->curr_meta->next; + } + else if (CLAST_STMT_IS_A (s, stmt_for)) + { + opencl_clast_meta current_clast = code_gen->curr_meta; + struct clast_for *for_stmt = (struct clast_for *) s; + bool dependency = false; + bool parallel = false; + /* If there are dependencies in loop, it can't be parallelized. */ + if (!flag_graphite_opencl_no_dep_check && + dependency_in_clast_loop_p (code_gen, current_clast, + for_stmt, depth)) + { + if (dump_p) + fprintf (dump_file, "dependency in loop\n"); + dependency = true; + } + if (!dependency) + parallel = opencl_should_be_parallel_p (code_gen, current_clast, + depth); + + /* Create init block for memory transfer befor loop. */ + current_clast->init_edge = code_gen->main_edge; + + if (parallel && !dependency) + { + opencl_body current_body; + opencl_fflush_rw_flags (code_gen); + opencl_mark_privatized_data (code_gen); + current_clast->on_device = true; + current_body + = opencl_clast_to_kernel (for_stmt, code_gen, depth); + + if (current_body->num_of_data_writes) + { + dyn_string_t header = current_body->header; + dyn_string_t pre_header = current_body->pre_header; + dyn_string_t body = current_body->body; + + dyn_string_append (code_gen->main_program, header); + dyn_string_append (code_gen->main_program, pre_header); + dyn_string_append (code_gen->main_program, body); + + opencl_create_gimple_for_body (current_body, code_gen); + } + htab_delete (code_gen->global_defined_vars); + update_ssa (TODO_update_ssa); + opencl_verify (); + opencl_body_delete (current_body); + code_gen->current_body = NULL; + } + else + { + code_gen->curr_meta = code_gen->curr_meta->body; + opencl_add_gimple_for_stmt_for (for_stmt, code_gen, + depth, dependency); + } + + opencl_fix_meta_flags (current_clast); + code_gen->curr_meta = current_clast->next; + } + else if (CLAST_STMT_IS_A (s, stmt_guard)) + opencl_add_gimple_for_stmt_guard ((struct clast_guard *) s, + code_gen, depth); + else if (CLAST_STMT_IS_A (s, stmt_block)) + opencl_transform_stmt_list (((struct clast_block *)s)->body, + code_gen, depth); + else + gcc_unreachable (); + if (tmp->parent) + opencl_fix_meta_flags (tmp->parent); + } +} + +/* Find opencl_data object by host object OBJ in CODE_GEN hash maps. */ + +opencl_data +opencl_get_data_by_tree (opencl_main code_gen, tree obj) +{ + map_tree_to_data tmp = map_tree_to_data_create (obj, NULL); + map_tree_to_data * slot + = (map_tree_to_data *) htab_find_slot (code_gen->tree_to_data, + tmp, INSERT); + if (*slot == NULL) + return NULL; + + return (*slot)->value; +} + +/* Find opencl_data object by data reference REF in CODE_GEN hash maps. */ + +opencl_data +opencl_get_data_by_data_ref (opencl_main code_gen, data_reference_p ref) +{ + map_ref_to_data tmp = map_ref_to_data_create (ref, NULL); + map_ref_to_data * slot + = (map_ref_to_data *) htab_find_slot (code_gen->ref_to_data, + tmp, INSERT); + if (*slot == NULL) + return NULL; + + return (*slot)->value; +} + +/* Register reference to DATA via data reference REF_KEY and + variable TREE_KEY in CODE_GEN structures. */ + +static void +opencl_register_data (opencl_main code_gen, opencl_data data, + tree tree_key, data_reference_p ref_key) +{ + htab_t ref_to_data = code_gen->ref_to_data; + htab_t tree_to_data = code_gen->tree_to_data; + map_ref_to_data ref_ptr = map_ref_to_data_create (ref_key, data); + map_tree_to_data tree_ptr = map_tree_to_data_create (tree_key, data); + map_ref_to_data * ref_slot; + map_tree_to_data * tree_slot; + + + ref_slot + = (map_ref_to_data *) htab_find_slot (ref_to_data, ref_ptr, INSERT); + gcc_assert (*ref_slot == NULL); + *ref_slot = ref_ptr; + + + tree_slot + = (map_tree_to_data *) htab_find_slot (tree_to_data, tree_ptr, INSERT); + gcc_assert (*tree_slot == NULL || (*tree_slot)->value == data); + *tree_slot = tree_ptr; +} + +/* Create required OpenCL variable for given DATA. */ + +static void +opencl_data_init_object (opencl_data data) +{ + if (TREE_CODE (TREE_TYPE (data->exact_object)) == POINTER_TYPE) + { + data->device_object + = opencl_create_tmp_var (ptr_type_node, "__opencl_data"); + data->is_static = false; + } + else + { + /* (TREE_CODE (TREE_TYPE (data->exact_object)) == ARRAY_TYPE) */ + map_tree_to_data tree_ptr + = map_tree_to_data_create (data->exact_object, data); + + map_tree_to_data * tree_slot = + (map_tree_to_data *) htab_find_slot (array_data_to_tree, + tree_ptr, INSERT); + gcc_assert (*tree_slot == NULL); + *tree_slot = tree_ptr; + + data->device_object + = opencl_create_static_ptr_variable ("__opencl_data"); + data->is_static = true; + data->size_variable = data->size_value; + VEC_safe_push (opencl_data, heap, opencl_array_data, data); + } +} + +/* Find opencl_data which represents array VAR. */ + +static opencl_data +opencl_get_static_data_by_tree (tree var) +{ + map_tree_to_data tmp = map_tree_to_data_create (var, NULL); + map_tree_to_data * slot + = (map_tree_to_data *) htab_find_slot (array_data_to_tree, + tmp, INSERT); + if (*slot == NULL) + return NULL; + + return (*slot)->value; + +} + +/* Analyze single data reference REF and update CODE_GEN structures. + If it access data, which has been accessed in data references + before, update it's size. Otherwise add data to array. */ + +static void +opencl_parse_single_data_ref (poly_dr_p ref, opencl_main code_gen) +{ + data_reference_p d_ref = (data_reference_p) PDR_CDR (ref); + tree data_ref_tree = dr_outermost_base_object (d_ref); + tree size = NULL_TREE; + opencl_data curr; + + curr = opencl_get_data_by_tree (code_gen, data_ref_tree); + size = opencl_get_indirect_size (data_ref_tree, ref); + if (curr) + { + if (!curr->is_static) + { + if (!size || !curr->size_value) + curr->size_value = NULL; + else + curr->size_value = fold_build2 (MAX_EXPR, sizetype, + size, curr->size_value); + } + } + else + { + curr = opencl_get_static_data_by_tree (data_ref_tree); + if (!curr) + { + curr = opencl_data_create (data_ref_tree, size); + opencl_data_init_object (curr); + } + curr->id = VEC_length (opencl_data, code_gen->opencl_function_data); + VEC_safe_push (opencl_data, heap, code_gen->opencl_function_data, curr); + } + opencl_register_data (code_gen, curr, data_ref_tree, d_ref); +} + +/* Analyse all data reference for poly basic block PBB and update CODE_GEN + structures. */ + +static void +opencl_parse_data_refs (poly_bb_p pbb, opencl_main code_gen) +{ + VEC (poly_dr_p, heap) *drs = PBB_DRS (pbb); + int i; + poly_dr_p curr; + for (i = 0; VEC_iterate (poly_dr_p, drs, i, curr); i++) + opencl_parse_single_data_ref (curr, code_gen); +} + +/* Analyse all data reference for scop M_SCOP and update + CODE_GEN structures. */ + +static void +opencl_init_data (scop_p m_scop, opencl_main code_gen) +{ + VEC (poly_bb_p, heap) * bbs = SCOP_BBS (m_scop); + int i; + poly_bb_p curr; + for (i = 0; VEC_iterate (poly_bb_p, bbs, i, curr); i++) + opencl_parse_data_refs (curr, code_gen); +} + +/* Add clWaitForEvent (1, EVENT_VAR); call to CODE_GEN->main_edge. */ + +static void +opencl_wait_for_event (opencl_main code_gen, tree event_var) +{ + tree function = opencl_create_function_decl (WAIT_FOR_EVENTS); + tree call = build_call_expr (function, 2, + integer_one_node, + event_var); + opencl_add_safe_call (code_gen, call, true); +} + +/* This calls must be placed after outermost loop processing. */ + +/* Add function calls to create and launch kernel KERNEL to + CODE_GEN->main_edge. */ + +static void +opencl_create_gimple_for_body (opencl_body kernel, opencl_main code_gen) +{ + tree num_of_exec = kernel->num_of_exec; + tree call; + + tree kernel_var + = opencl_insert_create_kernel_call (code_gen, (const char *)kernel->name); + + tree index_type = build_index_type (build_int_cst (NULL_TREE, 2)); + tree array_type = build_array_type (ptr_type_node, index_type); + tree var = opencl_create_tmp_var (array_type, "wait_event"); + TREE_STATIC (var) = 1; + assemble_variable (var, 1, 0, 1); + + call = build4 (ARRAY_REF, ptr_type_node, var, + integer_zero_node, NULL_TREE, NULL_TREE); + call = build_addr (call, current_function_decl); + + opencl_init_local_device_memory (code_gen, kernel); + opencl_pass_kernel_arguments (code_gen, kernel, kernel_var); + + opencl_execute_kernel (code_gen, num_of_exec, kernel_var, call); + opencl_wait_for_event (code_gen, call); +} + +/* Create global variables for opencl code. */ + +static void +opencl_create_gimple_variables (void) +{ + static bool opencl_var_created = false; + if (opencl_var_created) + return; + + opencl_var_created = true; + /* cl_context h_context */ + h_context = opencl_create_static_ptr_variable ("__ocl_h_context"); + + /* cl_command_queue h_cmd_queue */ + h_cmd_queue = opencl_create_static_ptr_variable ("__ocl_h_cmd_queue"); +} + +/* Return tree, which represents function selected by ID. + If ID is STATIC_INIT, init all required data. */ + +static tree +opencl_create_function_decl (enum OPENCL_FUNCTUONS id) +{ + static tree create_context_from_type_decl = NULL; + static tree get_context_info_decl = NULL; + static tree create_command_queue_decl = NULL; + static tree create_program_with_source_decl = NULL; + static tree build_program_decl = NULL; + static tree create_kernel_decl = NULL; + static tree create_buffer_decl = NULL; + static tree set_kernel_arg_decl = NULL; + static tree enqueue_nd_range_kernel_decl = NULL; + static tree enqueue_read_buffer_decl = NULL; + static tree enqueue_write_buffer_decl = NULL; + static tree release_memory_obj_decl = NULL; + static tree release_context_decl = NULL; + static tree release_command_queue_decl = NULL; + static tree release_program_decl = NULL; + static tree release_kernel_decl = NULL; + static tree get_platform_ids_decl = NULL; + static tree get_wait_for_events_decl = NULL; + switch (id) + { + case STATIC_INIT: + { + tree const_char_type = build_qualified_type (char_type_node, + TYPE_QUAL_CONST); + tree const_char_ptr = build_pointer_type (const_char_type); + tree const_char_ptr_ptr = build_pointer_type (const_char_ptr); + + tree const_size_t = build_qualified_type (size_type_node, + TYPE_QUAL_CONST); + tree const_size_t_ptr = build_pointer_type (const_size_t); + + tree size_t_ptr = build_pointer_type (size_type_node); + + tree cl_device_type = integer_type_node; + tree cl_context_info = unsigned_type_node; + tree cl_command_queue_properties = long_unsigned_type_node; + tree cl_mem_flags = long_unsigned_type_node; + + tree cl_context = ptr_type_node; + tree cl_context_properties = ptr_type_node; + tree cl_command_queue = ptr_type_node; + tree cl_device_id = ptr_type_node; + tree cl_program = ptr_type_node; + tree cl_kernel = ptr_type_node; + tree cl_event = ptr_type_node; + tree cl_mem = ptr_type_node; + + tree const_cl_event = build_qualified_type (cl_event, + TYPE_QUAL_CONST); + tree cl_event_ptr = build_pointer_type (cl_event); + tree const_cl_event_ptr = build_pointer_type (const_cl_event); + + tree const_cl_device_id = build_qualified_type (cl_device_id, + TYPE_QUAL_CONST); + tree const_cl_device_id_ptr = build_pointer_type (const_cl_device_id); + + tree cl_platford_id = long_integer_type_node; + tree cl_platford_id_ptr = build_pointer_type (cl_platford_id); + + tree function_type; + /* | cl_context + | clCreateContextFromType (cl_context_properties *properties, + | cl_device_type device_type, + | void (*pfn_notify) (const char *errinfo, + | const void *private_info, size_t cb, + | void *user_data), + | void *user_data, + | cl_int *errcode_ret) */ + function_type + = build_function_type_list (cl_context, + cl_context_properties, + cl_device_type, + ptr_type_node, + ptr_type_node, + integer_ptr_type_node, + NULL_TREE); + create_context_from_type_decl + = build_fn_decl (opencl_function_names[0], function_type); + + /* | cl_int clGetContextInfo (cl_context context, + | cl_context_info param_name, + | size_t param_value_size, + | void *param_value, + | size_t *param_value_size_ret) */ + function_type + = build_function_type_list (integer_type_node, + cl_context, + cl_context_info, + size_type_node, + ptr_type_node, + size_t_ptr, + NULL_TREE); + get_context_info_decl + = build_fn_decl (opencl_function_names[1], function_type); + + /* | cl_command_queue + | clCreateCommandQueue (cl_context context, + | cl_device_id device, + | cl_command_queue_properties properties, + | cl_int *errcode_ret) */ + function_type + = build_function_type_list (cl_command_queue, + cl_context, + cl_device_id, + cl_command_queue_properties, + integer_ptr_type_node, + NULL_TREE); + create_command_queue_decl + = build_fn_decl (opencl_function_names[2], function_type); + + /* | cl_program clCreateProgramWithSource (cl_context context, + | cl_uint count, + | const char **strings, + | const size_t *lengths, + | cl_int *errcode_ret) */ + function_type + = build_function_type_list (cl_program, + cl_context, + unsigned_type_node, + const_char_ptr_ptr, + const_size_t_ptr, + integer_ptr_type_node, + NULL_TREE); + create_program_with_source_decl + = build_fn_decl (opencl_function_names[3], function_type); + + /* | cl_int + | clBuildProgram (cl_program program, + | cl_uint num_devices, + | const cl_device_id *device_list, + | const char *options, + | void (*pfn_notify) (cl_program, void *user_data), + | void *user_data) */ + function_type + = build_function_type_list (integer_type_node, + cl_program, + unsigned_type_node, + const_cl_device_id_ptr, + const_char_ptr, + ptr_type_node, + ptr_type_node, + NULL_TREE); + build_program_decl + = build_fn_decl (opencl_function_names[4], function_type); + + /* | cl_kernel clCreateKernel (cl_program program, + | const char *kernel_name, + | cl_int *errcode_ret) */ + function_type + = build_function_type_list (cl_kernel, + cl_program, + const_char_ptr, + integer_ptr_type_node, + NULL_TREE); + + create_kernel_decl + = build_fn_decl (opencl_function_names[5], function_type); + + /* | cl_mem clCreateBuffer (cl_context context, + | cl_mem_flags flags, + | size_t size, + | void *host_ptr, + | cl_int *errcode_ret) */ + + function_type + = build_function_type_list (cl_mem, + cl_context, + cl_mem_flags, + size_type_node, + ptr_type_node, + integer_ptr_type_node, + NULL_TREE); + create_buffer_decl + = build_fn_decl (opencl_function_names[6], function_type); + + + /* | cl_int clSetKernelArg (cl_kernel kernel, + | cl_uint arg_index, + | size_t arg_size, + | const void *arg_value) */ + + function_type + = build_function_type_list (integer_type_node, + cl_kernel, + unsigned_type_node, + size_type_node, + const_ptr_type_node, + NULL_TREE); + set_kernel_arg_decl + = build_fn_decl (opencl_function_names[7], function_type); + + /* | cl_int clEnqueueNDRangeKernel (cl_command_queue command_queue, + | cl_kernel kernel, + | cl_uint work_dim, + | const size_t *global_work_offset, + | const size_t *global_work_size, + | const size_t *local_work_size, + | cl_uint num_events_in_wait_list, + | const cl_event *event_wait_list, + | cl_event *event) */ + + function_type + = build_function_type_list (integer_type_node, + cl_command_queue, + cl_kernel, + unsigned_type_node, + const_size_t_ptr, + const_size_t_ptr, + const_size_t_ptr, + unsigned_type_node, + const_cl_event_ptr, + cl_event_ptr, + NULL_TREE); + + enqueue_nd_range_kernel_decl + = build_fn_decl (opencl_function_names[8], function_type); + + /* | cl_int clEnqueueReadBuffer (cl_command_queue command_queue, + | cl_mem buffer, + | cl_bool blocking_read, + | size_t offset, + | size_t cb, + | void *ptr, + | cl_uint num_events_in_wait_list, + | const cl_event *event_wait_list, + | cl_event *event) */ + + function_type + = build_function_type_list (integer_type_node, + cl_command_queue, + cl_mem, + unsigned_type_node, + size_type_node, + size_type_node, + ptr_type_node, + unsigned_type_node, + const_cl_event_ptr, + cl_event_ptr, + NULL_TREE); + + enqueue_read_buffer_decl + = build_fn_decl (opencl_function_names[9], function_type); + + /* | cl_int clEnqueueWriteBuffer (cl_command_queue command_queue, + | cl_mem buffer, + | cl_bool blocking_write, + | size_t offset, + | size_t cb, + | const void *ptr, + | cl_uint num_events_in_wait_list, + | const cl_event *event_wait_list, + | cl_event *event) */ + + function_type + = build_function_type_list (integer_type_node, + cl_command_queue, + cl_mem, + unsigned_type_node, + size_type_node, + size_type_node, + const_ptr_type_node, + unsigned_type_node, + const_cl_event_ptr, + cl_event_ptr, + NULL_TREE); + + enqueue_write_buffer_decl + = build_fn_decl (opencl_function_names[10], function_type); + + + /* cl_int clReleaseMemObject (cl_mem memobj) */ + + function_type + = build_function_type_list (integer_type_node, cl_mem, NULL_TREE); + + release_memory_obj_decl + = build_fn_decl (opencl_function_names[11], function_type); + + + /* cl_int clReleaseContext (cl_context context) */ + function_type + = build_function_type_list (integer_type_node, cl_context, + NULL_TREE); + + release_context_decl + = build_fn_decl (opencl_function_names[12], function_type); + + /* cl_int clReleaseCommandQueue (cl_command_queue command_queue) */ + function_type + = build_function_type_list (integer_type_node, cl_command_queue, + NULL_TREE); + + release_command_queue_decl + = build_fn_decl (opencl_function_names[13], function_type); + + /* cl_int clReleaseProgram (cl_program program) */ + function_type + = build_function_type_list (integer_type_node, cl_program, + NULL_TREE); + + release_program_decl + = build_fn_decl (opencl_function_names[14], function_type); + + /* cl_int clReleaseKernel (cl_kernel kernel) */ + function_type + = build_function_type_list (integer_type_node, cl_kernel, NULL_TREE); + + release_kernel_decl + = build_fn_decl (opencl_function_names[15], function_type); + + /* | cl_int clGetPlatformIDs (cl_uint num_entries, + | cl_platform_id *platforms, + | cl_uint *num_platforms) */ + + + function_type + = build_function_type_list (integer_type_node, + unsigned_type_node, + cl_platford_id_ptr, + build_pointer_type (unsigned_type_node), + NULL_TREE); + get_platform_ids_decl + = build_fn_decl (opencl_function_names [16], function_type); + + + /* | cl_int clWaitForEvents (cl_uint num_events, + | const cl_event *event_list) */ + + function_type + = build_function_type_list (integer_type_node, + unsigned_type_node, + const_cl_event_ptr, + NULL_TREE); + + get_wait_for_events_decl + = build_fn_decl (opencl_function_names [17], function_type); + + return NULL_TREE; + } + case CREATE_CONTEXT_FROM_TYPE: return create_context_from_type_decl; + case GET_CONTEXT_INFO: return get_context_info_decl; + case CREATE_COMMAND_QUEUE: return create_command_queue_decl; + case CREATE_PROGRAM_WITH_SOURCE: return create_program_with_source_decl; + case BUILD_PROGRAM: return build_program_decl; + case CREATE_KERNEL: return create_kernel_decl; + case CREATE_BUFFER: return create_buffer_decl; + case SET_KERNEL_ARG: return set_kernel_arg_decl; + case ENQUEUE_ND_RANGE_KERNEL: return enqueue_nd_range_kernel_decl; + case ENQUEUE_READ_BUFFER: return enqueue_read_buffer_decl; + case ENQUEUE_WRITE_BUFFER: return enqueue_write_buffer_decl; + case RELEASE_MEMORY_OBJ: return release_memory_obj_decl; + case RELEASE_CONTEXT: return release_context_decl; + case RELEASE_COMMAND_QUEUE: return release_command_queue_decl; + case RELEASE_PROGRAM: return release_program_decl; + case RELEASE_KERNEL: return release_kernel_decl; + case GET_PLATFORM_IDS: return get_platform_ids_decl; + case WAIT_FOR_EVENTS: return get_wait_for_events_decl; + default: gcc_unreachable (); + } +} + +/* Create call + | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, 0, 0, + | &n_context_descriptor_size); + + POINTER_TO_SIZE if &n_context_descriptor_size. */ + +static tree +opencl_create_clGetContextInfo_1 (tree pointer_to_size) +{ + tree function = opencl_create_function_decl (GET_CONTEXT_INFO); + tree zero_pointer = null_pointer_node; + tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES); + tree context_var = h_context; + tree call = build_call_expr (function, 5, + context_var, + cl_contex_devices, + integer_zero_node, + zero_pointer, + pointer_to_size); + return call; +} + +/* Create call + | clGetContextInfo (h_context, CL_CONTEXT_DEVICES, + | n_context_descriptor_size, A_DEVICES, 0); + + POINTER_TO_SIZE if &n_context_descriptor_size. */ + +static tree +opencl_create_clGetContextInfo_2 (tree size, tree a_devices) +{ + tree function = opencl_create_function_decl (GET_CONTEXT_INFO); + tree zero_pointer = null_pointer_node; + tree cl_contex_devices = build_int_cst (NULL_TREE, CL_CONTEXT_DEVICES); + tree context_var = h_context; + tree call = build_call_expr (function, 5, + context_var, + cl_contex_devices, + size, + a_devices, + zero_pointer); + return call; +} + +/* Create context_properties array variable. */ + +static tree +opencl_create_context_properties (void) +{ + tree cl_context_properties_type = long_integer_type_node; + + tree index_type = build_index_type (build_int_cst (NULL_TREE, 3)); + tree array_type = build_array_type (cl_context_properties_type, + index_type); + return opencl_create_tmp_var (array_type, "context_properties"); +} + +/* Place calls to obtain current platform id to INIT_EDGE. + Place obtained id to VAR. */ + +static edge +opencl_set_context_properties (edge init_edge, tree var) +{ + tree function = opencl_create_function_decl (GET_PLATFORM_IDS); + tree cl_context_properties_type = long_integer_type_node; + tree call; + tree call2; + gimple_stmt_iterator g_iter; + + basic_block bb = split_edge (init_edge); + + init_edge = single_succ_edge (bb); + + g_iter = gsi_last_bb (bb); + call = build4 (ARRAY_REF, cl_context_properties_type, + var, integer_zero_node, NULL_TREE, NULL_TREE); + call2 = build_int_cst (NULL_TREE, CL_CONTEXT_PLATFORM); + call2 = build1 (CONVERT_EXPR, cl_context_properties_type, call2); + + call = build2 (MODIFY_EXPR, cl_context_properties_type, + call, call2); + + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + g_iter = gsi_last_bb (bb); + + call = build4 (ARRAY_REF, cl_context_properties_type, + var, integer_one_node, NULL_TREE, NULL_TREE); + call2 = build_call_expr (function, 3, + integer_one_node, + build_addr (call, current_function_decl), + null_pointer_node); + force_gimple_operand_gsi (&g_iter, call2, true, NULL, false, + GSI_CONTINUE_LINKING); + + call = build4 (ARRAY_REF, cl_context_properties_type, + var, build_int_cst (NULL_TREE, 2), NULL_TREE, NULL_TREE); + call = build2 (MODIFY_EXPR, cl_context_properties_type, + call, fold_convert (cl_context_properties_type, + integer_zero_node)); + force_gimple_operand_gsi (&g_iter, call, true, NULL, false, + GSI_CONTINUE_LINKING); + return init_edge; +} + +/* Create call + clCreateContextFromType (PROPERTIES, CL_DEVICE_TYPE_GPU, 0, 0, 0); */ + +static tree +opencl_create_clCreateContextFromType (tree properties) +{ + tree function = opencl_create_function_decl (CREATE_CONTEXT_FROM_TYPE); + tree zero_pointer = null_pointer_node; + tree device + = build_int_cst (NULL_TREE, flag_graphite_opencl_cpu + ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU); + tree call; + call = build_call_expr (function, 5, + build_addr (properties, current_function_decl), + device, + zero_pointer, + zero_pointer, + zero_pointer); + return call; +} + +/* Create call + clCreateCommandQueue (h_context, DEV_ID, 0, 0); */ + +static tree +opencl_create_clCreateCommandQueue (tree dev_id) +{ + + tree function = opencl_create_function_decl (CREATE_COMMAND_QUEUE); + tree zero_pointer = null_pointer_node; + tree context = h_context; + tree call = build_call_expr (function, 4, + context, + dev_id, + zero_pointer, + zero_pointer); + return call; +} + +/* Create call malloc (ARG). */ + +static tree +opencl_create_malloc_call (tree arg) +{ + tree function_type = + build_function_type_list (ptr_type_node, + integer_type_node, + NULL_TREE); + tree function = build_fn_decl ("malloc", function_type); + + tree call = build_call_expr (function, 1, arg); + return call; +} + +/* Init basic block in CODE_GEN structures. */ + +static void +opencl_init_basic_blocks (opencl_main code_gen) +{ + code_gen->data_init_bb = opencl_create_bb (code_gen); + code_gen->kernel_edge = code_gen->main_edge; +} + +/* Generate calls for opencl init functions and place them to INIT_EDGE. + Must be called only once in each function. */ + +static edge +opencl_create_init_context (edge init_edge) +{ + tree tmp_type; + tree call; + tree n_context_descriptor_size + = opencl_create_tmp_var (size_type_node, "__ocl_nContextDescriptorSize"); + tree a_devices = opencl_create_tmp_var (build_pointer_type (ptr_type_node), + "__ocl_a_devices"); + tree properties = opencl_create_context_properties (); + init_edge = opencl_set_context_properties (init_edge, properties); + call = opencl_create_clCreateContextFromType (properties); + call = build2 (MODIFY_EXPR, TREE_TYPE (h_context), + h_context, call); + init_edge = opencl_add_safe_call_on_edge (call, false, init_edge); + tmp_type = build_pointer_type + (TREE_TYPE (n_context_descriptor_size)); + call = build1 (ADDR_EXPR, tmp_type, + n_context_descriptor_size); + call = opencl_create_clGetContextInfo_1 (call); + init_edge = opencl_add_safe_call_on_edge (call, true, init_edge); + call = opencl_create_malloc_call (n_context_descriptor_size); + call = fold_convert (TREE_TYPE (a_devices), call); + call = build2 (MODIFY_EXPR, TREE_TYPE (a_devices), a_devices, call); + init_edge = opencl_add_safe_call_on_edge (call, false, init_edge); + call = opencl_create_clGetContextInfo_2 (n_context_descriptor_size, + a_devices); + init_edge = opencl_add_safe_call_on_edge (call, true, init_edge); + tmp_type = TREE_TYPE (TREE_TYPE (a_devices)); + call = build1 (INDIRECT_REF, tmp_type, a_devices); + call = opencl_create_clCreateCommandQueue (call); + call = build2 (MODIFY_EXPR, TREE_TYPE (h_cmd_queue), + h_cmd_queue, call); + init_edge = opencl_add_safe_call_on_edge (call, false, init_edge); + return init_edge; +} + +/* Fill array VEC with all poly basic blocks in clast statement ROOT. */ + +static void +build_poly_bb_vec (struct clast_stmt * root, + VEC (poly_bb_p, heap) ** vec) +{ + while (root) + { + if (CLAST_STMT_IS_A (root, stmt_user)) + { + poly_bb_p tmp + = (poly_bb_p) cloog_statement_usr + (((struct clast_user_stmt *) root)->statement); + VEC_safe_push (poly_bb_p, heap, *vec, tmp); + } + else if (CLAST_STMT_IS_A (root, stmt_for)) + build_poly_bb_vec (((struct clast_for *) root)->body, vec); + else if (CLAST_STMT_IS_A (root, stmt_guard)) + build_poly_bb_vec (((struct clast_guard *) root)->then, vec); + else if (CLAST_STMT_IS_A (root, stmt_block)) + build_poly_bb_vec (((struct clast_block *) root)->body, vec); + root = root->next; + } +} + +/* Check whether there is a dependency between PBB1 and PBB2 on level LEVEL. + CAN_BE_PRIVATE indicates which variables can be privatizated. + CODE_GEN holds information related to code generation. */ + +static bool +opencl_dependency_between_pbbs_p (opencl_main code_gen, poly_bb_p pbb1, + poly_bb_p pbb2, int level, + bitmap can_be_private) +{ + int i, j; + poly_dr_p pdr1, pdr2; + timevar_push (TV_GRAPHITE_DATA_DEPS); + for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb1), i, pdr1); i++) + { + data_reference_p ref1 = (data_reference_p)PDR_CDR (pdr1); + opencl_data data_1 = opencl_get_data_by_data_ref (code_gen, ref1); + if (bitmap_bit_p (can_be_private, data_1->id)) + continue; + + for (j = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb2), j, pdr2); j++) + { + data_reference_p ref2 = (data_reference_p)PDR_CDR (pdr2); + + opencl_data data_2 = opencl_get_data_by_data_ref (code_gen, ref2); + + if (bitmap_bit_p (can_be_private, data_2->id)) + continue; + + if (graphite_carried_dependence_level_k (pdr1, pdr2, level)) + { + timevar_pop (TV_GRAPHITE_DATA_DEPS); + return true; + } + } + } + + timevar_pop (TV_GRAPHITE_DATA_DEPS); + return false; +} + +/* Returns true, if there is dependency in clast loop STMT on depth DEPTH. + CODE_GEN holds information related to code generation. */ + +bool +dependency_in_clast_loop_p (opencl_main code_gen, opencl_clast_meta meta, + struct clast_for * stmt, int depth) +{ + VEC (poly_bb_p, heap) * pbbs = VEC_alloc (poly_bb_p, heap, 10); + int level = get_scattering_level (depth); + int i; + poly_bb_p pbb1; + bitmap can_be_private; + build_poly_bb_vec (stmt->body, &pbbs); + + can_be_private = meta->can_be_private; + + for (i = 0; VEC_iterate (poly_bb_p, pbbs, i, pbb1); i++) + { + int j; + poly_bb_p pbb2; + for (j = 0; VEC_iterate (poly_bb_p, pbbs, j, pbb2); j++) + if (opencl_dependency_between_pbbs_p (code_gen, pbb1, pbb1, + level, can_be_private)) + { + VEC_free (poly_bb_p, heap, pbbs); + return true; + } + } + VEC_free (poly_bb_p, heap, pbbs); + return false; +} + +/* Init graphite-opencl pass. Must be called in each function before + any scop processing. */ + +void +graphite_opencl_init (void) +{ + opencl_create_gimple_variables (); + + /* cl_program h_program */ + h_program + = opencl_create_static_ptr_variable ("__ocl_h_program"); + + opencl_function_kernels = VEC_alloc (tree, heap, OPENCL_INIT_BUFF_SIZE); + main_program_src = dyn_string_new (100); + + opencl_array_data = VEC_alloc (opencl_data, heap, OPENCL_INIT_BUFF_SIZE); + array_data_to_tree = htab_create (10, map_tree_to_data_to_hash, + map_tree_to_data_cmp, free); + + opencl_create_function_decl (STATIC_INIT); +} + +/* Create calls to initialize static data for current function and + place them to INIT_EDGE. */ + +static edge +opencl_init_static_data (edge init_edge) +{ + int i; + opencl_data curr; + for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i ++) + { + tree tmp; + if (!curr->used_on_device) + continue; + + tmp = opencl_create_memory_for_pointer (curr); + tmp = build2 (MODIFY_EXPR, ptr_type_node, curr->device_object, tmp); + init_edge = opencl_add_safe_call_on_edge (tmp, false, init_edge); + } + return init_edge; +} + +/* Finalize graphite-opencl pass for current function. Place all required + calls to STATIC_INIT_EDGE. Must be called after all scop processing + in current function. */ + +void +graphite_opencl_finalize (edge static_init_edge) +{ + int i; + opencl_data curr; + if (dyn_string_length (main_program_src) != 0) + { + tree call = build2 (EQ_EXPR, boolean_type_node, + h_program, null_pointer_node); + basic_block buff_init_block = split_edge (static_init_edge); + edge before_init; + edge init_edge; + + static_init_edge = single_succ_edge (buff_init_block); + + create_empty_if_region_on_edge (static_init_edge, call); + + static_init_edge = opencl_create_function_call + (get_true_edge_from_guard_bb (static_init_edge->dest)); + static_init_edge = opencl_init_static_data (static_init_edge); + before_init = single_pred_edge (buff_init_block); + call = build2 (EQ_EXPR, boolean_type_node, + h_context, null_pointer_node); + + create_empty_if_region_on_edge (before_init, call); + init_edge = get_true_edge_from_guard_bb (before_init->dest); + init_edge = opencl_create_init_context (init_edge); + } + dyn_string_delete (main_program_src); + for (i = 0; VEC_iterate (opencl_data, opencl_array_data, i, curr); i++) + opencl_data_delete (curr); + + VEC_free (tree, heap, opencl_function_kernels); + VEC_free (opencl_data, heap, opencl_array_data); + recompute_all_dominators (); + update_ssa (TODO_update_ssa); +} + +/* Debug functions for deined data structures. */ + +static void +dump_flag_to_file (const char * name, bool cond, + FILE * file, int indent) +{ + indent_to (file, indent); + fprintf (file, "%s = %s", name, cond? "true" : "false"); +} + +void +dump_opencl_data (opencl_data data, FILE * file, bool verbose) +{ + fprintf (file, "Data id = %d\n", data->id); + fprintf (file, "Data dimension = %d\n", data->data_dim); + fprintf (file, "Data depth = %d\n", data->depth); + fprintf (file, "Flags"); + indent_to (file, 2); + fprintf (file, "Global"); + dump_flag_to_file ("Static", data->is_static, file, 4); + dump_flag_to_file ("Can be private", data->can_be_private, file, 4); + dump_flag_to_file ("Used on device", data->used_on_device, file, 4); + dump_flag_to_file ("Ever read on device", + data->ever_read_on_device, file, 4); + + dump_flag_to_file ("Ever written on device", + data->ever_written_on_device, file, 4); + + dump_flag_to_file ("Supported", data->supported, file, 4); + indent_to (file, 2); + fprintf (file, "Local"); + + dump_flag_to_file ("Up to date on device", + data->up_to_date_on_device, file, 4); + dump_flag_to_file ("Up to date on host", + data->up_to_date_on_host, file, 4); + + dump_flag_to_file ("Inited in current loop on host", + data->inited_in_current_loop_on_host, file, 4); + + dump_flag_to_file ("Inited in current loop on device", + data->inited_in_current_loop_on_device, file, 4); + + dump_flag_to_file ("Written in current body", + data->written_in_current_body, file, 4); + + dump_flag_to_file ("Read in current body", + data->read_in_current_body, file, 4); + dump_flag_to_file ("Privatized", data->privatized, file, 4); + + fprintf (file, "\n"); + + if (verbose) + { + fprintf (file, "\nObject\n"); + print_node_brief (file, "", data->object, 2); + + fprintf (file, "\nDevice object\n"); + print_node_brief (file, "", data->device_object, 2); + + fprintf (file, "\nSize value\n"); + print_node_brief (file, "", data->size_value, 2); + + fprintf (file, "\nSize variable\n"); + print_node_brief (file, "", data->size_variable, 2); + + fprintf (file, "\nExact object\n"); + print_node_brief (file, "", data->exact_object, 2); + } +} + +DEBUG_FUNCTION void +debug_opencl_data (opencl_data data, bool verbose) +{ + dump_opencl_data (data, stderr, verbose); +} + +void +dump_opencl_body (opencl_body body, FILE * file, bool verbose) +{ + fprintf (file, "\n%s\n\n", body->name); + fprintf (file, "First iterator: %s\n", body->first_iter); + fprintf (file, "Last iterator: %s\n", body->last_iter); + fprintf (file, "Number of data writes = %d\n\n", body->num_of_data_writes); + fprintf (file, "Function header::\n"); + fprintf (file, "%s\n\n", dyn_string_buf (body->header)); + fprintf (file, "Non scalar args::\n"); + fprintf (file, "%s\n\n", dyn_string_buf (body->non_scalar_args)); + fprintf (file, "Pre header::\n"); + fprintf (file, "%s\n\n", dyn_string_buf (body->pre_header)); + fprintf (file, "Body::\n"); + fprintf (file, "%s\n\n", dyn_string_buf (body->body)); + + fprintf (file, "Number of executions::\n"); + print_node_brief (file, "", body->num_of_exec, 2); + + if (verbose) + print_clast_stmt (file, body->clast_body); +} + +DEBUG_FUNCTION void +debug_opencl_body (opencl_body body, bool verbose) +{ + dump_opencl_body (body, stderr, verbose); +} + +void +dump_opencl_clast_meta (opencl_clast_meta meta, FILE * file, + bool verbose, int indent) +{ + if (!verbose) + /* Just print structure of meta. */ + { + while (meta) + { + indent_to (file, indent); + fprintf (file, "", + meta->in_depth, meta->out_depth, + meta->on_device?"true":"false", + meta->access_unsupported?"false":"true"); + dump_opencl_clast_meta (meta->body, file, false, indent + 4); + meta = meta->next; + } + } + else + { + fprintf (file, "", + meta->in_depth, meta->out_depth, + meta->on_device?"true":"false", + meta->access_unsupported?"false":"true"); + + fprintf (file, "\nModified on host::\n"); + debug_bitmap_file (file, meta->modified_on_host); + + fprintf (file, "\nModified on device::\n"); + debug_bitmap_file (file, meta->modified_on_device); + + fprintf (file, "\nAccess::\n"); + debug_bitmap_file (file, meta->access); + + fprintf (file, "\nCan be private::\n"); + debug_bitmap_file (file, meta->can_be_private); + } +} + +DEBUG_FUNCTION void +debug_opencl_clast_meta (opencl_clast_meta meta, bool verbose) +{ + dump_opencl_clast_meta (meta, stderr, verbose, 0); +} + +static int +print_char_p_htab (void ** h, void * v) +{ + char ** ptr = (char **)h; + FILE * file = (FILE *)v; + fprintf (file, " %s\n", *ptr); + return 1; +} + +static int +print_tree_to_data_htab (void ** h, void * v) +{ + map_tree_to_data * map = (map_tree_to_data *)h; + FILE * file = (FILE *)v; + tree key = (*map)->key; + opencl_data data = (*map)->value; + print_node_brief (file, "key = ", key, 2); + fprintf (file, " data_id = %d\n", data->id); + return 1; +} + +static int +print_ref_to_data_htab (void ** h, void * v) +{ + map_ref_to_data * map = (map_ref_to_data *)h; + FILE * file = (FILE *)v; + data_reference_p key = (*map)->key; + opencl_data data = (*map)->value; + fprintf (file, "key::\n"); + dump_data_reference (file, key); + fprintf (file, "data_id = %d\n\n", data->id); + return 1; +} + +void +dump_opencl_main (opencl_main code_gen, FILE * file, bool verbose) +{ + fprintf (file, "Current meta::\n"); + dump_opencl_clast_meta (code_gen->curr_meta, file, false, 2); + fprintf (file, "\n"); + if (code_gen->current_body) + { + fprintf (file, "Current body::\n"); + dump_opencl_body (code_gen->current_body, file, verbose); + } + fprintf (file, "\n\nData init basic block::\n"); + dump_bb (code_gen->data_init_bb, stderr, 0); + + if (code_gen->defined_vars) + { + fprintf (file, "Defined variables::\n"); + htab_traverse_noresize (code_gen->defined_vars, print_char_p_htab, + file); + } + + if (code_gen->global_defined_vars) + { + fprintf (file, "Global defined variables::\n"); + htab_traverse_noresize (code_gen->global_defined_vars, + print_char_p_htab, file); + } + fprintf (file, "Refs to data::\n"); + htab_traverse_noresize (code_gen->ref_to_data, + print_ref_to_data_htab, file); + + fprintf (file, "Trees to data::\n"); + htab_traverse_noresize (code_gen->tree_to_data, + print_tree_to_data_htab, file); + + if (verbose) + fprintf (file, "%s\n", dyn_string_buf (code_gen->main_program)); +} + +DEBUG_FUNCTION void +debug_opencl_main (opencl_main code_gen, bool verbose) +{ + dump_opencl_main (code_gen, stderr, verbose); +} + +DEBUG_FUNCTION void +debug_opencl_program (void) +{ + fprintf (stderr, "%s", dyn_string_buf (main_program_src)); +} + +#endif +#include "gt-graphite-opencl.h" diff --git a/gcc/graphite-opencl.h b/gcc/graphite-opencl.h new file mode 100644 index 0000000..4913b09 --- /dev/null +++ b/gcc/graphite-opencl.h @@ -0,0 +1,254 @@ +/* GRAPHITE-OpenCL pass. + Copyright (C) 2009, 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ +#define OPENCL_INIT_BUFF_SIZE 40 + +/* Data structure which holds information about single array or pointer + in current scop. */ +struct opencl_data_def +{ + /* Base object. In case of arrays base array replaced by pointer to this + array. */ + tree object; + /* Device object, corresponding to base object. */ + tree device_object; + /* Size of data references in scop. */ + tree size_value; + + /* Variable to hold size_value. */ + tree size_variable; + + /* Unique id. */ + int id; + + /* If true, then given data represents static object. */ + bool is_static; + + /* Exact object represented by given data. */ + tree exact_object; + + /* If true, given data can be privatized in current context. */ + bool can_be_private; + /* Current state. */ + bool up_to_date_on_device; + bool up_to_date_on_host; + + bool inited_in_current_loop_on_host; + bool inited_in_current_loop_on_device; + + bool written_in_current_body; + bool read_in_current_body; + + bool privatized; + + /* Data dimension. */ + int data_dim; + int depth; + + /* Access information. */ + bool used_on_device; + bool ever_read_on_device; + bool ever_written_on_device; + + bool supported; +}; + +typedef struct opencl_data_def * opencl_data; +typedef const char * char_p; + +DEF_VEC_P (opencl_data); +DEF_VEC_ALLOC_P (opencl_data, heap); + +/* Data structure with meta information about clast statement. + | + | LOOP_1 {out_depth = 0, in_depth = 3} + | / \ + | / \ + | / \ + | / \ + | {out_depth = 1, LOOP_2 STMT_1 {in_depth = 0, + | in_depth = 2} out_depth = 1} + | | + | | + | | + | LOOP_3 {out_depth = 2, in_depth = 1} + | | + | | + | | + | STMT_2 {out_depth = 3, in_depth = 3} +*/ +struct opencl_clast_meta_def +{ + /* Outer depth. */ + int out_depth; + /* Inner depth. */ + int in_depth; + /* True means, that statement will be executed on device. */ + bool on_device; + struct opencl_clast_meta_def * next; + struct opencl_clast_meta_def * body; + struct opencl_clast_meta_def * parent; + + /* All init code for this statement will be placed on this edge. */ + edge init_edge; + + /* Data modifications in this statement (including children). */ + bitmap modified_on_device; + bitmap modified_on_host; + + /* Data, which can be private in current loop. */ + bitmap can_be_private; + + /* Access information. */ + bool access_unsupported; + bitmap access; + + VEC (opencl_data, heap) *post_pass_to_host; + VEC (opencl_data, heap) *post_pass_to_device; +}; + +typedef struct opencl_clast_meta_def *opencl_clast_meta; + +DEF_VEC_P (char_p); +DEF_VEC_ALLOC_P (char_p, heap); + +/* Single opencl kernel. */ +struct graphite_opencl_kernel_body +{ + /* Function body. */ + dyn_string_t body; + + /* Function header. */ + dyn_string_t header; + + /* Variables declarations. */ + dyn_string_t pre_header; + + /* Number of executions for kernel. */ + tree num_of_exec; + + dyn_string_t non_scalar_args; + + /* Number of write data references in kernel. */ + int num_of_data_writes; + + + /* Clast_stmt corresponding to kernel. */ + struct clast_stmt *clast_body; + + /* First iterator (scat_i). */ + const char *first_iter; + + /* First iterator (scat_{i+n}). */ + const char *last_iter; + + /* Kernel name. All kernels have names opencl_auto_function_. */ + char name[40]; + + /* Variables, which must be passed to kernel. */ + VEC (tree, heap) *function_args; + VEC (tree, heap) *function_args_to_pass; + VEC (opencl_data, heap) *data_refs; +}; + +typedef struct graphite_opencl_kernel_body *opencl_body; + + +/* Main data structure for translating clast to gimple with opencl + function calls. */ +struct graphite_opencl_creator +{ + /* Array with scat_* (iterators from clast data structures) names. */ + CloogNames *root_names; + + /* Current kernel. */ + opencl_body current_body; + + /* Current region. */ + sese region; + + /* Meta information for current scop. */ + opencl_clast_meta clast_meta; + + /* Current meta statement. */ + opencl_clast_meta curr_meta; + + /* Htab of all defined local vars (not tmp vars, generated by gimplify). */ + htab_t defined_vars; + + /* Htab of all defined global vars. */ + htab_t global_defined_vars; + + /* Current edge. */ + edge main_edge; + + /* Edge with kernels executing. */ + edge kernel_edge; + + /* Current opencl program. */ + dyn_string_t main_program; + + /* Information from clast structures. */ + htab_t newivs_index; + htab_t params_index; + VEC (tree, heap) *newivs; + + /* Current loop. */ + loop_p context_loop; + + /* Basic block with init statements for data. */ + basic_block data_init_bb; + + /* Data used in kernel. */ + VEC(opencl_data, heap) *opencl_function_data; + + VEC(tree, heap) *iv_map; + + htab_t ref_to_data; + htab_t tree_to_data; +}; + +typedef struct graphite_opencl_creator * opencl_main; + +extern opencl_body opencl_clast_to_kernel (struct clast_for *, + opencl_main, int); +extern tree dr_outermost_base_object (data_reference_p); +extern void dump_opencl_main (opencl_main, FILE *, bool); +extern void dump_opencl_body (opencl_body, FILE *, bool); +extern void dump_opencl_clast_meta (opencl_clast_meta, FILE *, bool, int); +extern void dump_opencl_data (opencl_data, FILE *, bool); +extern void debug_opencl_main (opencl_main, bool); +extern void debug_opencl_program (void); +extern void debug_opencl_body (opencl_body, bool); +extern void debug_opencl_clast_meta (opencl_clast_meta, bool); +extern void debug_opencl_data (opencl_data, bool); + +/* Find opencl_data object by host object. */ +extern opencl_data opencl_get_data_by_data_ref (opencl_main, data_reference_p); +extern opencl_data opencl_get_data_by_tree (opencl_main, tree); +extern tree opencl_tree_to_var (basic_block, tree); +extern opencl_clast_meta opencl_create_meta_from_clast (opencl_main, + struct clast_stmt *, + int, opencl_clast_meta); +extern bool opencl_should_be_parallel_p (opencl_main, opencl_clast_meta, int); + +/* Create new basic block on main edge and update main_edge. */ +extern basic_block opencl_create_bb (opencl_main); +extern bool dependency_in_clast_loop_p (opencl_main, opencl_clast_meta, + struct clast_for *, int); +extern tree opencl_get_base_object_by_tree (tree); diff --git a/gcc/graphite.c b/gcc/graphite.c index 4ce484a..64779bb 100644 --- a/gcc/graphite.c +++ b/gcc/graphite.c @@ -281,10 +281,16 @@ graphite_transform_loops (void) { build_poly_scop (scop); + if (flag_graphite_opencl) + graphite_opencl_init (); + if (POLY_SCOP_P (scop) && apply_poly_transforms (scop) && gloog (scop, bb_pbb_mapping)) need_cfg_cleanup_p = true; + + if (flag_graphite_opencl) + graphite_opencl_finalize (single_succ_edge (ENTRY_BLOCK_PTR)); } htab_delete (bb_pbb_mapping); diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 4b51f40..d1d7142 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -308,7 +308,8 @@ gate_graphite_transforms (void) || flag_loop_strip_mine || flag_graphite_identity || flag_loop_parallelize_all - || flag_loop_flatten) + || flag_loop_flatten + || flag_graphite_opencl) flag_graphite = 1; return flag_graphite != 0;