===================================================================
@@ -383,7 +383,8 @@
|| flag_loop_strip_mine
|| flag_graphite_identity
|| flag_loop_parallelize_all
- || flag_loop_optimize_isl)
+ || flag_loop_optimize_isl
+ || flag_loop_unroll_jam)
flag_graphite = 1;
return flag_graphite != 0;
===================================================================
@@ -1328,6 +1328,10 @@
Common Report Var(flag_loop_block) Optimization
Enable Loop Blocking transformation
+floop-unroll-jam
+Common Report Var(flag_loop_unroll_jam) Optimization
+Enable Loop Unroll Jam transformation
+
fgnu-tm
Common Report Var(flag_tm)
Enable support for GNU transactional memory
===================================================================
@@ -1302,11 +1302,12 @@
|| flag_loop_block
|| flag_loop_interchange
|| flag_loop_strip_mine
- || flag_loop_parallelize_all)
+ || flag_loop_parallelize_all
+ || flag_loop_unroll_jam)
sorry ("Graphite loop optimizations cannot be used (ISL is not available)"
"(-fgraphite, -fgraphite-identity, -floop-block, "
"-floop-interchange, -floop-strip-mine, -floop-parallelize-all, "
- "and -ftree-loop-linear)");
+ "-floop-unroll-jam, and -ftree-loop-linear)");
#endif
/* One region RA really helps to decrease the code size. */
===================================================================
@@ -847,6 +847,21 @@
"size of tiles for loop blocking",
51, 0, 0)
+/* Size of unrolling factor for unroll-and-jam. */
+
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_SIZE,
+ "loop-unroll-jam-size",
+ "size of unrolling factor for unroll-and-jam",
+ 4, 0, 0)
+
+/* Size of the band formed by the strip mined dimension and the most inner one for unroll-and-jam. */
+
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_DEPTH,
+ "loop-unroll-jam-depth",
+ "depth of unrolled loop for unroll-and-jam",
+ 2, 0, 0)
+
+
/* Maximal number of parameters that we allow in a SCoP. */
DEFPARAM (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS,
===================================================================
@@ -831,6 +831,159 @@
return schedule;
}
+/* Auxiliary data structure to separate the maps. */
+
+struct sep_map
+{
+ isl_map *map_arr[2];
+ int n;
+};
+
+/* Auxiliary function used to separate the maps. */
+
+static int
+separate_map (isl_basic_map *bmap, void *user)
+{
+ struct sep_map *smap = (struct sep_map *) user;
+
+ smap->map_arr[smap->n] = isl_map_from_basic_map(bmap);
+ smap->n += 1;
+
+ return 0;
+}
+
+/* Set the unroll AST built option. */
+
+static __isl_give isl_union_map *
+generate_isl_options_2 (scop_p scop, __isl_take isl_union_set *domain,
+ int dim, int cl)
+{
+ isl_map *map;
+ isl_space *space, *space_sep;
+ isl_ctx *ctx;
+ isl_union_map *mapu;
+ int nsched = get_max_schedule_dimensions (scop);
+
+ ctx = scop->ctx;
+ space_sep = isl_space_alloc(ctx, 0, 1, 1);
+ space_sep = isl_space_wrap(space_sep);
+ space_sep = isl_space_set_tuple_name(space_sep, isl_dim_set,
+ "separation_class");
+ space = isl_set_get_space (scop->context);
+ space_sep = isl_space_align_params(space_sep, isl_space_copy(space));
+ space = isl_space_map_from_domain_and_range(space, space_sep);
+ space = isl_space_add_dims (space,isl_dim_in, nsched);
+ map = isl_map_universe (space);
+ isl_map_fix_si (map,isl_dim_out,0,dim);
+ isl_map_fix_si (map,isl_dim_out,1,cl);
+
+ mapu = isl_union_map_intersect_domain (isl_union_map_from_map (map),
+ domain);
+ return (mapu);
+}
+
+/* Set the separation_class AST built option. */
+
+static __isl_give isl_union_map *
+generate_isl_options_1 (scop_p scop, const char *str, int dim)
+{
+ isl_ctx *ctx;
+ isl_map *map;
+ isl_space *space_sep;
+ isl_space *space;
+ isl_union_map *options_isl;
+ int nsched = get_max_schedule_dimensions (scop);
+
+ ctx = scop->ctx;
+ space_sep = isl_space_alloc(ctx, 0, 1, 1);
+ space = isl_set_get_space (scop->context);
+ space_sep = isl_space_align_params(space_sep, space);
+ space_sep = isl_space_add_dims (space_sep,isl_dim_in, nsched - 1);
+ space_sep = isl_space_set_tuple_name(space_sep, isl_dim_out, str);
+ map = isl_map_universe (space_sep);
+ isl_map_fix_si (map,isl_dim_out,0,dim);
+
+ options_isl = isl_union_map_from_map (map);
+
+ return options_isl;
+}
+
+/* Compute the separation class. */
+
+static __isl_give isl_union_set *
+generate_isl_domain_n (scop_p scop)
+{
+ int i;
+ poly_bb_p pbb;
+ isl_union_set *domain_isl;
+
+ domain_isl = isl_union_set_empty (isl_set_get_space (scop->context));
+
+ FOR_EACH_VEC_ELT (SCOP_BBS (scop), i, pbb)
+ {
+ isl_set *bb_domain;
+ isl_set *bb_domain_s;
+ isl_set *bb_domain_r;
+ int res;
+
+ struct sep_map smap, *psmap;
+
+ /* Dead code elimination: when the domain of a PBB is empty,
+ don't generate code for the PBB. */
+ if (isl_set_is_empty (pbb->domain))
+ continue;
+
+ bb_domain = isl_set_copy (pbb->domain);
+
+ /* Extract the original and auxiliar maps from pbb->transformed.
+ Set pbb->transformed to the original map. */
+ psmap = &smap;
+ psmap->n = 0;
+ res = isl_map_foreach_basic_map (pbb->transformed, separate_map, (void *)psmap);
+ gcc_assert (res == 0);
+
+ isl_map_free(pbb->transformed);
+ pbb->transformed = isl_map_copy(psmap->map_arr[0]);
+
+ bb_domain_s = isl_set_apply (isl_set_copy (bb_domain),
+ psmap->map_arr[1]);
+ bb_domain_r = isl_set_apply (bb_domain, psmap->map_arr[0]);
+
+ bb_domain_s = isl_set_complement (bb_domain_s);
+ bb_domain_r = isl_set_subtract(bb_domain_r,bb_domain_s);
+
+ domain_isl =
+ isl_union_set_union (domain_isl,
+ isl_union_set_from_set (bb_domain_r));
+ }
+
+ return domain_isl;
+}
+
+/* Set the AST built options for loop unroll and jam. */
+
+static __isl_give isl_union_map *
+generate_isl_options (scop_p scop)
+{
+ isl_union_set *domain_isl;
+ isl_union_map *options_isl_ss;
+ isl_union_map *options_isl =
+ isl_union_map_empty (isl_set_get_space (scop->context));
+ int dim = get_max_schedule_dimensions (scop) - 1;
+
+ if (!flag_loop_unroll_jam)
+ return options_isl;
+
+ domain_isl = generate_isl_domain_n (scop);
+
+ options_isl_ss = generate_isl_options_1 (scop, "unroll", dim);
+ options_isl = isl_union_map_union (options_isl, options_isl_ss);
+ options_isl_ss = generate_isl_options_2 (scop, domain_isl, dim, 0);
+ options_isl = isl_union_map_union (options_isl, options_isl_ss);
+
+ return options_isl;
+}
+
/* Generates a schedule, which specifies an order used to
visit elements in a domain. */
@@ -879,11 +1032,13 @@
}
/* Set the separate option for all dimensions.
- This helps to reduce control overhead. */
+ This helps to reduce control overhead.
+ Set the options for unroll and jam. */
static __isl_give isl_ast_build *
set_options (__isl_take isl_ast_build *control,
- __isl_keep isl_union_map *schedule)
+ __isl_keep isl_union_map *schedule,
+ __isl_take isl_union_map *opt_luj)
{
isl_ctx *ctx = isl_union_map_get_ctx (schedule);
isl_space *range_space = isl_space_set_alloc (ctx, 0, 1);
@@ -894,6 +1049,9 @@
isl_union_set *domain = isl_union_map_range (isl_union_map_copy (schedule));
domain = isl_union_set_universe (domain);
isl_union_map *options = isl_union_map_from_domain_and_range (domain, range);
+
+ options = isl_union_map_union (options, opt_luj);
+
return isl_ast_build_set_options (control, options);
}
@@ -907,9 +1065,14 @@
isl_options_set_ast_build_atomic_upper_bound (scop->ctx, true);
add_parameters_to_ivs_params (scop, ip);
+
+ isl_union_map *options_isl = generate_isl_options (scop);
+
isl_union_map *schedule_isl = generate_isl_schedule (scop);
isl_ast_build *context_isl = generate_isl_context (scop);
- context_isl = set_options (context_isl, schedule_isl);
+
+ context_isl = set_options (context_isl, schedule_isl, options_isl);
+
isl_union_map *dependences = NULL;
if (flag_loop_parallelize_all)
{
===================================================================
@@ -186,7 +186,7 @@
PartialSchedule = isl_band_get_partial_schedule (Band);
*Dimensions = isl_band_n_member (Band);
- if (DisableTiling)
+ if (DisableTiling || flag_loop_unroll_jam)
return PartialSchedule;
/* It does not make any sense to tile a band with just one dimension. */
@@ -305,8 +305,83 @@
isl_constraint_set_constant_si (c, VectorWidth - 1);
TilingMap = isl_map_add_constraint (TilingMap, c);
- isl_map_dump (TilingMap);
+ return TilingMap;
+}
+/* Create an auxiliary map to getPrevectorMap whose image provides the
+ separation class for full/partial tile separation of the strip-mined
+ dimension. It is used in graphite_isl_ast_to_gimple.c to set the
+ corresponding option for AST build. */
+static isl_map *
+getPrevectorMap_f (isl_ctx *ctx, int DimToVectorize,
+ int ScheduleDimensions,
+ int VectorWidth)
+{
+ isl_space *Space;
+ isl_local_space *LocalSpace, *LocalSpaceRange;
+ isl_set *Modulo;
+ isl_map *TilingMap;
+ isl_constraint *c;
+ isl_aff *Aff;
+ int PointDimension; /* ip */
+ int TileDimension; /* it */
+ isl_val *VectorWidthMP;
+ int i;
+
+ /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
+
+ Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
+ TilingMap = isl_map_universe (isl_space_copy (Space));
+ LocalSpace = isl_local_space_from_space (Space);
+ PointDimension = ScheduleDimensions;
+ TileDimension = DimToVectorize;
+
+ /* Create an identity map for everything except DimToVectorize and the
+ point loop. */
+ for (i = 0; i < ScheduleDimensions; i++)
+ {
+ if (i == DimToVectorize)
+ continue;
+
+ c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
+
+ isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
+
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+ }
+
+ /* it % 'VectorWidth' = 0 */
+ LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
+ Aff = isl_aff_zero_on_domain (LocalSpaceRange);
+ Aff = isl_aff_set_constant_si (Aff, VectorWidth);
+ Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
+
+ VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
+ Aff = isl_aff_mod_val (Aff, VectorWidthMP);
+ Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
+ TilingMap = isl_map_intersect_range (TilingMap, Modulo);
+
+ /* it + ('VectorWidth' - 1) <= i0 */
+ c = isl_inequality_alloc (isl_local_space_copy(LocalSpace));
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
+ isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
+ isl_constraint_set_constant_si (c, -VectorWidth + 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
+ /* it <= ip */
+ c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
+ /* ip <= it + ('VectorWidth' - 1) */
+ c = isl_inequality_alloc (LocalSpace);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
+ isl_constraint_set_constant_si (c, VectorWidth - 1);
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+
return TilingMap;
}
@@ -350,17 +425,31 @@
SuffixSchedule);
isl_band_list_free (Children);
}
- else if (EnablePollyVector)
+ else if (EnablePollyVector || flag_loop_unroll_jam)
{
+ int i;
+ int depth;
+
+ depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
+
for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
{
+ if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
+ continue;
+
if (isl_band_member_is_zero_distance (Band, i))
{
isl_map *TileMap;
isl_union_map *TileUMap;
+ int stride;
- TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
+ stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);
+
+ TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
TileUMap = isl_union_map_from_map (TileMap);
+ TileMap = getPrevectorMap_f (ctx, i, ScheduleDimensions, stride);
+ TileUMap = isl_union_map_add_map (TileUMap, TileMap);
+
TileUMap = isl_union_map_align_params
(TileUMap, isl_space_copy (Space));
PartialSchedule = isl_union_map_apply_range
===================================================================
@@ -276,7 +276,7 @@
/* This pass needs to be run at the final stage, as it does not
update the lst. */
- if (flag_loop_optimize_isl)
+ if (flag_loop_optimize_isl || flag_loop_unroll_jam)
transform_done |= optimize_isl (scop);
return transform_done;