@@ -1582,6 +1582,7 @@ OBJS = \
omp-expand.o \
omp-general.o \
omp-low.o \
+ omp-transform-loops.o \
omp-oacc-kernels-decompose.o \
omp-oacc-neuter-broadcast.o \
omp-simd-clone.o \
@@ -16470,6 +16470,15 @@ With @option{--param=openacc-privatization=quiet}, don't diagnose.
This is the current default.
With @option{--param=openacc-privatization=noisy}, do diagnose.
+@item omp-unroll-full-max-iterations
+The maximum number of iterations of a loop for which an OpenMP @samp{omp unroll}
+directive on the loop without a clause is turned into an
+@samp{omp unroll full}.
+
+@item omp-unroll-default-factor
+The unroll factor used for loops that have an OpenMP @samp{omp unroll partial}
+directive without an explicit unroll factor.
+
@end table
The following choices of @var{name} are available on AArch64 targets:
@@ -1474,6 +1474,9 @@ dump_gimple_omp_for (pretty_printer *buffer, const gomp_for *gs, int spc,
case GF_OMP_FOR_KIND_SIMD:
kind = " simd";
break;
+ case GF_OMP_FOR_KIND_TRANSFORM_LOOP:
+ kind = " unroll";
+ break;
default:
gcc_unreachable ();
}
@@ -1511,6 +1514,9 @@ dump_gimple_omp_for (pretty_printer *buffer, const gomp_for *gs, int spc,
case GF_OMP_FOR_KIND_SIMD:
pp_string (buffer, "#pragma omp simd");
break;
+ case GF_OMP_FOR_KIND_TRANSFORM_LOOP:
+ pp_string (buffer, "#pragma omp loop_transform");
+ break;
default:
gcc_unreachable ();
}
@@ -159,6 +159,7 @@ enum gf_mask {
GF_OMP_FOR_KIND_TASKLOOP = 2,
GF_OMP_FOR_KIND_OACC_LOOP = 4,
GF_OMP_FOR_KIND_SIMD = 5,
+ GF_OMP_FOR_KIND_TRANSFORM_LOOP = 6,
GF_OMP_FOR_COMBINED = 1 << 3,
GF_OMP_FOR_COMBINED_INTO = 1 << 4,
GF_OMP_TARGET_KIND_MASK = (1 << 5) - 1,
@@ -6069,6 +6069,7 @@ is_gimple_stmt (tree t)
case OACC_CACHE:
case OMP_PARALLEL:
case OMP_FOR:
+ case OMP_LOOP_TRANS:
case OMP_SIMD:
case OMP_DISTRIBUTE:
case OMP_LOOP:
@@ -12300,6 +12301,11 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
}
break;
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_TILE:
+ break;
case OMP_CLAUSE_NOHOST:
default:
gcc_unreachable ();
@@ -13284,6 +13290,10 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_INCLUSIVE:
case OMP_CLAUSE_EXCLUSIVE:
+ case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
break;
case OMP_CLAUSE_NOHOST:
@@ -13775,6 +13785,29 @@ find_standalone_omp_ordered (tree *tp, int *walk_subtrees, void *)
return NULL_TREE;
}
+static void omp_for_drop_tile_clauses (tree for_stmt)
+{
+ /* Drop erroneous loop transformation clauses to avoid follow up errors
+ in pass-omp_transform_loops. */
+ tree last_c = NULL_TREE;
+ for (tree c = OMP_FOR_CLAUSES (for_stmt); c;
+ c = OMP_CLAUSE_CHAIN (c))
+ {
+
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_TILE)
+ continue;
+
+ if (last_c)
+ TREE_CHAIN (last_c) = TREE_CHAIN (c);
+ else
+ OMP_FOR_CLAUSES (for_stmt) = TREE_CHAIN (c);
+
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "'tile' loop transformation may not appear on "
+ "non-rectangular for");
+ }
+}
+
/* Gimplify the gross structure of an OMP_FOR statement. */
static enum gimplify_status
@@ -13966,6 +13999,8 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
case OMP_FOR:
if (OMP_FOR_NON_RECTANGULAR (inner_for_stmt ? inner_for_stmt : for_stmt))
{
+ omp_for_drop_tile_clauses (for_stmt);
+
if (omp_find_clause (OMP_FOR_CLAUSES (for_stmt),
OMP_CLAUSE_SCHEDULE))
error_at (EXPR_LOCATION (for_stmt),
@@ -14010,6 +14045,10 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
case OMP_SIMD:
ort = ORT_SIMD;
break;
+ case OMP_LOOP_TRANS:
+ if (OMP_FOR_NON_RECTANGULAR (inner_for_stmt ? inner_for_stmt : for_stmt))
+ omp_for_drop_tile_clauses (for_stmt);
+ break;
default:
gcc_unreachable ();
}
@@ -14370,6 +14409,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
n->value &= ~GOVD_LASTPRIVATE_CONDITIONAL;
}
}
+ else if (TREE_CODE (orig_for_stmt) == OMP_LOOP_TRANS)
+ {
+ /* This loop is not going to be associated with any
+ directive after its transformation in
+ pass_omp_transform_loops. It will be lowered there
+ and the loop iteration variable will be used in the
+ context. */
+ omp_notice_variable (gimplify_omp_ctxp, decl, true);
+ }
else
omp_add_variable (gimplify_omp_ctxp, decl, GOVD_PRIVATE | GOVD_SEEN);
@@ -14412,7 +14460,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
c2 = NULL_TREE;
}
}
- else
+ else if (TREE_CODE (orig_for_stmt) != OMP_LOOP_TRANS)
omp_add_variable (gimplify_omp_ctxp, var,
GOVD_PRIVATE | GOVD_SEEN);
}
@@ -14693,6 +14741,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
case OMP_DISTRIBUTE: kind = GF_OMP_FOR_KIND_DISTRIBUTE; break;
case OMP_TASKLOOP: kind = GF_OMP_FOR_KIND_TASKLOOP; break;
case OACC_LOOP: kind = GF_OMP_FOR_KIND_OACC_LOOP; break;
+ case OMP_LOOP_TRANS: kind = GF_OMP_FOR_KIND_TRANSFORM_LOOP; break;
default:
gcc_unreachable ();
}
@@ -14877,6 +14926,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
gtask_clauses_ptr = &OMP_CLAUSE_CHAIN (c);
}
break;
+ /* Move loop transformations to inner loop */
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_TILE:
+ *gfor_clauses_ptr = c;
+ gfor_clauses_ptr = &OMP_CLAUSE_CHAIN (c);
+ break;
default:
gcc_unreachable ();
}
@@ -15317,6 +15374,11 @@ gimplify_omp_loop (tree *expr_p, gimple_seq *pre_p)
}
pc = &OMP_CLAUSE_CHAIN (*pc);
break;
+ case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ break;
default:
gcc_unreachable ();
}
@@ -17098,6 +17160,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
case OMP_FOR:
case OMP_DISTRIBUTE:
case OMP_TASKLOOP:
+ case OMP_LOOP_TRANS:
case OACC_LOOP:
ret = gimplify_omp_for (expr_p, pre_p);
break;
@@ -2253,6 +2253,20 @@ omp_declare_variant_remove_hook (struct cgraph_node *node, void *)
}
}
+/* Return true if C is a clause that represents an OpenMP loop transformation
+ directive, false otherwise. */
+
+bool
+omp_loop_transform_clause_p (tree c)
+{
+ if (c == NULL)
+ return false;
+
+ enum omp_clause_code code = OMP_CLAUSE_CODE (c);
+ return (code == OMP_CLAUSE_UNROLL_FULL || code == OMP_CLAUSE_UNROLL_PARTIAL
+ || code == OMP_CLAUSE_UNROLL_NONE || code == OMP_CLAUSE_TILE);
+}
+
/* Try to resolve declare variant, return the variant decl if it should
be used instead of base, or base otherwise. */
@@ -114,6 +114,7 @@ extern int omp_context_selector_matches (tree);
extern int omp_context_selector_set_compare (const char *, tree, tree);
extern tree omp_get_context_selector (tree, const char *, const char *);
extern tree omp_resolve_declare_variant (tree);
+extern bool omp_loop_transform_clause_p (tree);
extern tree oacc_launch_pack (unsigned code, tree device, unsigned op);
extern tree oacc_replace_fn_attrib_attr (tree attribs, tree dims);
extern void oacc_replace_fn_attrib (tree fn, tree dims);
new file mode 100644
@@ -0,0 +1,1815 @@
+/* OMP loop transformation pass. Transforms loops according to
+ loop transformations directives such as "omp unroll".
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "pretty-print.h"
+#include "diagnostic-core.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "tree-inline.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree-pass.h"
+#include "gimple-walk.h"
+#include "gimple-pretty-print.h"
+#include "gimplify.h"
+#include "ssa.h"
+#include "tree-into-ssa.h"
+#include "fold-const.h"
+#include "print-tree.h"
+#include "omp-general.h"
+
+/* Context information for walk_omp_for_loops. */
+struct walk_ctx
+{
+ /* The most recently visited gomp_for that has been transformed and
+ for which gimple_omp_for_set_combined_into_p returned true. */
+ gomp_for *inner_combined_loop;
+
+ /* The innermost bind enclosing the currently visited node. */
+ gbind *bind;
+};
+
+static unsigned int walk_omp_for_loops (gimple_seq *, walk_ctx *);
+static enum tree_code omp_adjust_neq_condition (tree v, tree step);
+
+static bool
+non_rectangular_p (const gomp_for *omp_for)
+{
+ size_t collapse = gimple_omp_for_collapse (omp_for);
+ for (size_t i = 0; i < collapse; i++)
+ {
+ if (TREE_CODE (gimple_omp_for_final (omp_for, i)) == TREE_VEC
+ || TREE_CODE (gimple_omp_for_initial (omp_for, i)) == TREE_VEC)
+ return true;
+ }
+
+ return false;
+}
+
+/* Callback for subst_var. */
+
+static tree
+subst_var_in_op (tree *t, int *subtrees ATTRIBUTE_UNUSED, void *data)
+{
+
+ auto *wi = (struct walk_stmt_info *)data;
+ auto from_to = (std::pair<tree, tree> *)wi->info;
+
+ if (*t == from_to->first)
+ {
+ *t = from_to->second;
+ wi->changed = true;
+ }
+
+ return NULL_TREE;
+}
+
+/* Substitute all occurrences of FROM in the operands of the GIMPLE statements
+ in SEQ by TO. */
+
+static void
+subst_var (gimple_seq *seq, tree from, tree to)
+{
+ gcc_assert (VAR_P (from));
+ gcc_assert (VAR_P (to));
+
+ std::pair<tree, tree> from_to (from, to);
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.info = (void *)&from_to;
+
+ walk_gimple_seq_mod (seq, NULL, subst_var_in_op, &wi);
+}
+
+/* Return the type that should be used for computing the iteration count of a
+ loop with the given index VAR and upper/lower bound FINAL according to
+ OpenMP 5.1. */
+
+tree
+gomp_for_iter_count_type (tree var, tree final)
+{
+ tree var_type = TREE_TYPE (var);
+
+ if (POINTER_TYPE_P (var_type))
+ return ptrdiff_type_node;
+
+ tree operand_type = TREE_TYPE (final);
+ if (TYPE_UNSIGNED (var_type) && !TYPE_UNSIGNED (operand_type))
+ return signed_type_for (operand_type);
+
+ return var_type;
+}
+
+extern tree
+gimple_assign_rhs_to_tree (gimple *stmt);
+
+/* Substitute all definitions from SEQ bottom-up into EXPR. This is used to
+ reconstruct a tree from a gimplified expression for determinig whether or not
+ the number of iterations of a loop is constant. */
+
+tree
+subst_defs (tree expr, gimple_seq seq)
+{
+ gimple_seq_node last = gimple_seq_last (seq);
+ gimple_seq_node first = gimple_seq_first (seq);
+ for (auto n = last; n != NULL; n = n != first ? n->prev : NULL)
+ {
+ if (!is_gimple_assign (n))
+ continue;
+
+ tree lhs = gimple_assign_lhs (n);
+ tree rhs = gimple_assign_rhs_to_tree (n);
+ std::pair<tree, tree> from_to (lhs, rhs);
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.info = (void *)&from_to;
+ walk_tree (&expr, subst_var_in_op, &wi, NULL);
+ expr = fold (expr);
+ }
+
+ return expr;
+}
+
+/* Return an expression for the number of iterations of the loop at
+ the given LEVEL of OMP_FOR.
+
+ If the expression is a negative constant, this means that the loop
+ is infinite. This can only be recognized for loops with constant
+ initial, final, and step values. In general, according to the
+ OpenMP specification, the behaviour is unspecified if the number of
+ iterations does not fit the types used for their computation, and
+ hence in particular if the loop is infinite. */
+
+tree
+gomp_for_number_of_iterations (const gomp_for *omp_for, size_t level)
+{
+ gcc_assert (!non_rectangular_p (omp_for));
+ tree init = gimple_omp_for_initial (omp_for, level);
+ tree final = gimple_omp_for_final (omp_for, level);
+ tree_code cond = gimple_omp_for_cond (omp_for, level);
+ tree index = gimple_omp_for_index (omp_for, level);
+ tree type = gomp_for_iter_count_type (index, final);
+ tree incr = gimple_omp_for_incr (omp_for, level);
+ tree step = omp_get_for_step_from_incr (gimple_location (omp_for), incr);
+
+ init = subst_defs (init, gimple_omp_for_pre_body (omp_for));
+ init = fold (init);
+ final = subst_defs (final, gimple_omp_for_pre_body (omp_for));
+ final = fold (final);
+
+ tree_code minus_code = MINUS_EXPR;
+ tree diff_type = type;
+ if (POINTER_TYPE_P (TREE_TYPE (final)))
+ {
+ minus_code = POINTER_DIFF_EXPR;
+ diff_type = ptrdiff_type_node;
+ }
+
+
+ /* Identify a simple case in which the loop does not iterate. The
+ computation below could not tell this apart from an infinite
+ loop, hence we handle this separately for better diagnostic
+ messages. */
+ gcc_assert (cond == GT_EXPR || cond == LT_EXPR);
+ if (TREE_CONSTANT (init) && TREE_CONSTANT (final)
+ && ((cond == GT_EXPR && tree_int_cst_le (init, final))
+ || (cond == LT_EXPR && tree_int_cst_le (final, init))))
+ return build_int_cst (diff_type, 0);
+
+ tree diff = fold_build2 (minus_code, diff_type, final, init);
+
+ /* Divide diff by the step.
+
+ We could always use CEIL_DIV_EXPR since only non-negative results
+ correspond to valid number of iterations and the behaviour is
+ unspecified by the spec otherwise. But we try to get the rounding
+ right for constant negative values to identify infinite loops
+ more precisely for better warnings. */
+ tree_code div_expr = CEIL_DIV_EXPR;
+ if (TREE_CONSTANT (diff) && TREE_CONSTANT (step))
+ {
+ bool diff_is_neg = tree_int_cst_lt (diff, size_zero_node);
+ bool step_is_neg = tree_int_cst_lt (step, size_zero_node);
+ if ((diff_is_neg && !step_is_neg)
+ || (!diff_is_neg && step_is_neg))
+ div_expr = FLOOR_DIV_EXPR;
+ }
+
+ diff = fold_build2 (div_expr, type, diff, step);
+ return diff;
+}
+
+/* Return true if the expression representing the number of iterations
+ for OMP_FOR is a non-negative constant and set ITERATIONS to the
+ value of that expression. Otherwise, return false. Set INFINITE to
+ true if the number of iterations was recognized to be infinite. */
+
+bool
+gomp_for_constant_iterations_p (gomp_for *omp_for,
+ unsigned HOST_WIDE_INT *iterations,
+ bool *infinite = NULL)
+{
+ tree t = gomp_for_number_of_iterations (omp_for, 0);
+ if (!TREE_CONSTANT (t))
+ return false;
+
+ if (infinite &&
+ tree_int_cst_lt (t, size_zero_node))
+ *infinite = true;
+ else if (tree_fits_uhwi_p (t))
+ {
+ *iterations = tree_to_uhwi (t);
+ return true;
+ }
+
+ return false;
+}
+
+static gimple_seq
+expand_transformed_loop (gomp_for *omp_for);
+
+/* Split a gomp_for that represents a collapsed loop-nest into single
+ loops. The result is a gomp_for of the same kind which is not collapsed
+ (i.e. gimple_omp_for_collapse (OMP_FOR) == 1) and which contains nested,
+ non-collapsed gomp_for loops whose kind is GF_OMP_FOR_KIND_TRANSFORM_LOOP
+ (i.e. they will be lowered into plain, non-omp loops by this pass) for each
+ of the loops of OMP_FOR. All loops whose depth is strictly less than
+ FROM_DEPTH are left collapsed. */
+
+static gomp_for*
+gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0, bool expand = false)
+{
+ int collapse = gimple_omp_for_collapse (omp_for);
+ gcc_assert (from_depth < collapse);
+ gcc_assert (from_depth >= 0);
+
+ if (collapse <= 1)
+ return omp_for;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, omp_for,
+ "Uncollapsing loop:\n %G\n",
+ static_cast <gimple *> (omp_for));
+
+ gimple_seq body = gimple_omp_body (omp_for);
+ gomp_for *level_omp_for = omp_for;
+ for (int level = collapse - 1; level >= from_depth; level--)
+ {
+ level_omp_for = gimple_build_omp_for (body,
+ GF_OMP_FOR_KIND_TRANSFORM_LOOP,
+ NULL, 1, NULL);
+ gimple_omp_for_set_cond (level_omp_for, 0,
+ gimple_omp_for_cond (omp_for, level));
+ gimple_omp_for_set_initial (level_omp_for, 0,
+ gimple_omp_for_initial (omp_for, level));
+ gimple_omp_for_set_final (level_omp_for, 0,
+ gimple_omp_for_final (omp_for, level));
+ gimple_omp_for_set_incr (level_omp_for, 0,
+ gimple_omp_for_incr (omp_for, level));
+ gimple_omp_for_set_index (level_omp_for, 0,
+ gimple_omp_for_index (omp_for, level));
+
+
+ if (expand)
+ body = expand_transformed_loop (level_omp_for);
+ else
+ body = level_omp_for;
+ }
+
+ omp_for->collapse = from_depth;
+
+ if (from_depth > 0)
+ {
+ gimple_omp_set_body (omp_for, body);
+ omp_for->collapse = from_depth;
+ return omp_for;
+ }
+
+ gimple_omp_for_set_clauses (level_omp_for, gimple_omp_for_clauses (omp_for));
+ gimple_omp_for_set_pre_body (level_omp_for, gimple_omp_for_pre_body (omp_for));
+ gimple_omp_for_set_combined_into_p (level_omp_for,
+ gimple_omp_for_combined_into_p (omp_for));
+ gimple_omp_for_set_combined_p (level_omp_for,
+ gimple_omp_for_combined_p (omp_for));
+
+ return level_omp_for;
+}
+
+static tree
+build_loop_exit_cond (tree index, tree_code cond, tree final, gimple_seq *seq)
+{
+ tree exit_cond
+ = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
+ fold_build2 (cond, boolean_type_node, index, final));
+ tree res = create_tmp_var (boolean_type_node);
+ gimplify_assign (res, exit_cond, seq);
+
+ return res;
+}
+
+/* Returns a register that contains the final value of a loop as described by
+ FINAL. This is necessary for non-rectangular loops. */
+
+static tree
+build_loop_final (tree final, gimple_seq *seq)
+{
+ if (TREE_CODE (final) != TREE_VEC) /* rectangular loop-nest */
+ return final;
+
+ tree coeff = TREE_VEC_ELT (final, 0);
+ tree outer_var = TREE_VEC_ELT (final, 1);
+ tree constt = TREE_VEC_ELT (final, 2);
+
+ tree type = TREE_TYPE (outer_var);
+ tree val = fold_build2 (MULT_EXPR, type, coeff, outer_var);
+ val = fold_build2 (PLUS_EXPR, type, val, constt);
+
+ tree res = create_tmp_var (type);
+ gimplify_assign (res, val, seq);
+
+ return res;
+}
+
+/* Unroll the loop BODY UNROLL_FACTOR times, replacing the INDEX
+ variable by a local copy in each copy of the body that will be
+ incremented as specified by INCR. If BUILD_EXIT_CONDS is true,
+ insert a test of the loop exit condition given COND and FINAL
+ before each copy of the body that will exit the loop if the value
+ of the local index variable satisfies the loop exit condition.
+
+ For example, the unrolling with BUILD_EXIT_CONDS == true turns
+
+ for (i = 0; i < 3; i = i + 1)
+ {
+ BODY
+ }
+
+ into
+
+ for (i = 0; i < n; i = i + 1)
+ {
+ i.0 = i
+ if (!(i_0 < n))
+ goto exit
+ BODY_COPY_1[i/i.0] i.e. index var i replaced by i.0
+ if (!(i_1 < n))
+ goto exit
+ i.1 = i.0 + 1
+ BODY_COPY_2[i/i.1]
+ if (!(i_3 < n))
+ goto exit
+ i.2 = i.2 + 1
+ BODY_COPY_3[i/i.2]
+ exit:
+ }
+ */
+static gimple_seq
+build_unroll_body (gimple_seq body, tree unroll_factor, tree index, tree incr,
+ bool build_exit_conds = false, tree final = NULL_TREE,
+ tree_code *cond = NULL)
+{
+ gcc_assert ((!build_exit_conds && !final && !cond)
+ || (build_exit_conds && final && cond));
+
+ gimple_seq new_body = NULL;
+
+ push_gimplify_context ();
+
+ if (build_exit_conds)
+ final = build_loop_final (final, &new_body);
+
+ tree local_index = create_tmp_var (TREE_TYPE (index));
+ subst_var (&body, index, local_index);
+ tree local_incr = unshare_expr (incr);
+ TREE_OPERAND (local_incr, 0) = local_index;
+
+ tree exit_label = create_artificial_label (gimple_location (body));
+
+ unsigned HOST_WIDE_INT n = tree_to_uhwi (unroll_factor);
+ for (unsigned HOST_WIDE_INT i = 0; i < n; i++)
+ {
+ if (i == 0)
+ gimplify_assign (local_index, index, &new_body);
+ else
+ gimplify_assign (local_index, local_incr, &new_body);
+
+ tree body_copy_label = create_artificial_label (gimple_location (body));
+
+ if (build_exit_conds)
+ {
+ tree exit_cond
+ = build_loop_exit_cond (local_index, *cond, final, &new_body);
+ gimple_seq_add_stmt (
+ &new_body,
+ gimple_build_cond (EQ_EXPR, exit_cond, boolean_true_node,
+ exit_label, body_copy_label));
+ }
+
+ gimple_seq body_copy = copy_gimple_seq_and_replace_locals (body);
+ gimple_seq_add_stmt (&new_body, gimple_build_label (body_copy_label));
+ gimple_seq_add_seq (&new_body, body_copy);
+ }
+
+
+ gbind *bind = gimple_build_bind (NULL, new_body, NULL);
+ pop_gimplify_context (bind);
+
+ gimple_seq result = NULL;
+ gimple_seq_add_stmt (&result, bind);
+ gimple_seq_add_stmt (&result, gimple_build_label (exit_label));
+ return result;
+}
+
+static gimple_seq transform_gomp_for (gomp_for *, tree, walk_ctx *ctx);
+
+/* Execute the partial unrolling transformation for OMP_FOR with the given
+ UNROLL_FACTOR and return the resulting gimple bind. LOC is the location for
+ diagnostic messages.
+
+ Example
+ --------
+ --------
+
+ Original loop
+ -------------
+
+ #pragma omp for unroll_partial(3)
+ for (i = 0; i < 100; i = i + 1)
+ {
+ BODY
+ }
+
+ gets, roughly, translated to
+
+ {
+ #pragma omp for
+ for (i = 0; i < 100; i = i + 3)
+ {
+ i.0 = i
+ if i.0 > 100:
+ goto exit_label
+ BODY_COPY_1[i/i.0] i.e. index var replaced
+ i.1 = i + 1
+ if i.1 > 100:
+ goto exit_label
+ BODY_COPY_2[i/1.1]
+ i.2 = i + 2
+ if i.2 > 100:
+ goto exit_label
+ BODY_COPY_3[i/i.2]
+
+ exit_label:
+ }
+*/
+
+static gimple_seq
+partial_unroll (gomp_for *omp_for, size_t level, tree unroll_factor,
+ location_t loc, tree transformation_clauses, walk_ctx *ctx)
+{
+ gcc_assert (unroll_factor);
+ gcc_assert (
+ OMP_CLAUSE_CODE (transformation_clauses) == OMP_CLAUSE_UNROLL_PARTIAL
+ || OMP_CLAUSE_CODE (transformation_clauses) == OMP_CLAUSE_UNROLL_NONE);
+
+ /* Partial unrolling reduces the loop nest depth of a canonical loop nest to 1
+ hence outer directives cannot require a greater collapse. */
+ gcc_assert (gimple_omp_for_collapse (omp_for) <= level + 1);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
+ dump_user_location_t::from_location_t (loc),
+ "Partially unrolling loop:\n %G\n",
+ static_cast<gimple *> (omp_for));
+
+ gomp_for *unrolled_for = as_a<gomp_for *> (copy_gimple_seq_and_replace_locals (omp_for));
+
+ tree final = gimple_omp_for_final (unrolled_for, level);
+ tree incr = gimple_omp_for_incr (unrolled_for, level);
+ tree index = gimple_omp_for_index (unrolled_for, level);
+ gimple_seq body = gimple_omp_body (unrolled_for);
+
+ tree_code cond = gimple_omp_for_cond (unrolled_for, level);
+ tree step = TREE_OPERAND (incr, 1);
+ gimple_omp_set_body (unrolled_for,
+ build_unroll_body (body, unroll_factor, index, incr,
+ true, final, &cond));
+
+ gbind *result_bind = gimple_build_bind (NULL, NULL, NULL);
+
+ push_gimplify_context ();
+
+ tree scaled_step
+ = fold_build2 (MULT_EXPR, TREE_TYPE (step),
+ fold_convert (TREE_TYPE (step), unroll_factor), step);
+
+ /* For combined constructs, step will be gimplified on the outer
+ gomp_for. */
+ if (!gimple_omp_for_combined_into_p (omp_for)
+ && !TREE_CONSTANT (scaled_step))
+ {
+ tree var = create_tmp_var (TREE_TYPE (step), ".omp_unroll_step");
+ gimplify_assign (var, scaled_step,
+ gimple_omp_for_pre_body_ptr (unrolled_for));
+ scaled_step = var;
+ }
+ TREE_OPERAND (incr, 1) = scaled_step;
+ gimple_omp_for_set_incr (unrolled_for, level, incr);
+
+ pop_gimplify_context (result_bind);
+
+ if (gimple_omp_for_combined_into_p (omp_for))
+ ctx->inner_combined_loop = unrolled_for;
+
+ tree remaining_clauses = OMP_CLAUSE_CHAIN (transformation_clauses);
+ gimple_seq_add_stmt (
+ gimple_bind_body_ptr (result_bind),
+ transform_gomp_for (unrolled_for, remaining_clauses, ctx));
+
+ return result_bind;
+}
+
+static gimple_seq
+full_unroll (gomp_for *omp_for, location_t loc, walk_ctx *ctx ATTRIBUTE_UNUSED)
+{
+ tree init = gimple_omp_for_initial (omp_for, 0);
+ unsigned HOST_WIDE_INT niter = 0;
+ bool infinite = false;
+ bool constant = gomp_for_constant_iterations_p (omp_for, &niter, &infinite);
+
+ if (infinite)
+ {
+ warning_at (loc, 0, "Cannot apply full unrolling to infinite loop");
+ return NULL;
+ }
+ if (!constant)
+ {
+ error_at (loc, "Cannot apply full unrolling to loop with "
+ "non-constant number of iterations");
+ return omp_for;
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
+ dump_user_location_t::from_location_t (loc),
+ "Fully unrolling loop with "
+ HOST_WIDE_INT_PRINT_UNSIGNED
+ " iterations :\n %G\n", niter,
+ static_cast <gimple *>(omp_for));
+
+ tree incr = gimple_omp_for_incr (omp_for, 0);
+ tree index = gimple_omp_for_index (omp_for, 0);
+ gimple_seq body = gimple_omp_body (omp_for);
+
+ tree unroll_factor = build_int_cst (TREE_TYPE (init), niter);
+
+ gimple_seq unrolled = NULL;
+ gimple_seq_add_seq (&unrolled, gimple_omp_for_pre_body (omp_for));
+ gimplify_assign (index, init, &unrolled);
+ push_gimplify_context ();
+ gimple_seq_add_seq (&unrolled,
+ build_unroll_body (body, unroll_factor, index, incr));
+
+ gbind *result_bind = gimple_build_bind (NULL, unrolled, NULL);
+ pop_gimplify_context (result_bind);
+ return result_bind;
+}
+
+/* Decides if the OMP_FOR for which the user did not specify the type of
+ unrolling to apply in the 'unroll' directive represented by the TRANSFORM
+ clause should be fully unrolled. */
+
+static bool
+assign_unroll_full_clause_p (gomp_for *omp_for, tree transform)
+{
+ gcc_assert (OMP_CLAUSE_CODE (transform) == OMP_CLAUSE_UNROLL_NONE);
+ gcc_assert (OMP_CLAUSE_CHAIN (transform) == NULL);
+
+ /* Full unrolling turns the loop into a non-loop and hence
+ the following transformations would fail. */
+ if (TREE_CHAIN (transform) != NULL_TREE)
+ return false;
+
+ unsigned HOST_WIDE_INT num_iters;
+ if (!gomp_for_constant_iterations_p (omp_for, &num_iters)
+ || num_iters
+ > (unsigned HOST_WIDE_INT)param_omp_unroll_full_max_iterations)
+ return false;
+
+ if (dump_enabled_p ())
+ {
+ auto loc = dump_user_location_t::from_location_t (
+ OMP_CLAUSE_LOCATION (transform));
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
+ "assigned %<full%> clause to %<omp unroll%> with small "
+ "constant number of iterations\n");
+ }
+
+ return true;
+}
+
+/* If the OMP_FOR for which the user did not specify the type of unrolling in
+ the 'unroll' directive in the TRANSFORM clause should be partially unrolled,
+ return the unroll factor, otherwise return null. */
+
+static tree
+assign_unroll_partial_clause_p (gomp_for *omp_for ATTRIBUTE_UNUSED,
+ tree transform)
+{
+ gcc_assert (OMP_CLAUSE_CODE (transform) == OMP_CLAUSE_UNROLL_NONE);
+
+ if (param_omp_unroll_default_factor == 0)
+ return NULL;
+
+ tree unroll_factor
+ = build_int_cst (integer_type_node, param_omp_unroll_default_factor);
+
+ if (dump_enabled_p ())
+ {
+ auto loc = dump_user_location_t::from_location_t (
+ OMP_CLAUSE_LOCATION (transform));
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
+ "added %<partial(%u)%> clause to %<omp unroll%> directive\n",
+ param_omp_unroll_default_factor);
+ }
+
+ return unroll_factor;
+}
+
+/* Generate the code for an OMP_FOR that represents the result of a
+ loop transformation which is not associated with any directive and
+ which will hence not be lowered in the omp-expansion. */
+
+static gimple_seq
+expand_transformed_loop (gomp_for *omp_for)
+{
+ gcc_assert (gimple_omp_for_kind (omp_for)
+ == GF_OMP_FOR_KIND_TRANSFORM_LOOP);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, omp_for,
+ "Expanding loop:\n %G\n",
+ static_cast <gimple *> (omp_for));
+
+ push_gimplify_context ();
+
+ omp_for = gomp_for_uncollapse (omp_for);
+
+ tree incr = gimple_omp_for_incr (omp_for, 0);
+ tree index = gimple_omp_for_index (omp_for, 0);
+ tree init = gimple_omp_for_initial (omp_for, 0);
+ tree final = gimple_omp_for_final (omp_for, 0);
+ tree_code cond = gimple_omp_for_cond (omp_for, 0);
+ gimple_seq body = gimple_omp_body (omp_for);
+ gimple_seq pre_body = gimple_omp_for_pre_body (omp_for);
+
+ gimple_seq loop = NULL;
+
+ tree exit_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree cycle_label = create_artificial_label (UNKNOWN_LOCATION);
+ tree body_label = create_artificial_label (UNKNOWN_LOCATION);
+
+ gimple_seq_add_seq (&loop, pre_body);
+ gimplify_assign (index, init, &loop);
+ tree final_var = final;
+ if (TREE_CODE (final) != VAR_DECL)
+ {
+ final_var = create_tmp_var (TREE_TYPE (final));
+ gimplify_assign (final_var, final, &loop);
+ }
+
+ gimple_seq_add_stmt (&loop, gimple_build_label (cycle_label));
+ gimple_seq_add_stmt (&loop, gimple_build_cond (cond, index, final_var,
+ body_label, exit_label));
+ gimple_seq_add_stmt (&loop, gimple_build_label (body_label));
+ gimple_seq_add_seq (&loop, body);
+ gimplify_assign (index, incr, &loop);
+ gimple_seq_add_stmt (&loop, gimple_build_goto (cycle_label));
+ gimple_seq_add_stmt (&loop, gimple_build_label (exit_label));
+
+ gbind *bind = gimple_build_bind (NULL, loop, NULL);
+ pop_gimplify_context (bind);
+
+ return bind;
+}
+
+static enum tree_code
+omp_adjust_neq_condition (tree v, tree step)
+{
+ gcc_assert (TREE_CODE (step) == INTEGER_CST);
+ if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE)
+ {
+ if (integer_onep (step))
+ return LT_EXPR;
+ else
+ {
+ gcc_assert (integer_minus_onep (step));
+ return GT_EXPR;
+ }
+ }
+ else
+ {
+ tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v)));
+ gcc_assert (TREE_CODE (unit) == INTEGER_CST);
+ if (tree_int_cst_equal (unit, step))
+ return LT_EXPR;
+ else
+ {
+ gcc_assert (wi::neg (wi::to_widest (unit))
+ == wi::to_widest (step));
+ return GT_EXPR;
+ }
+ }
+}
+
+/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR,
+ given that V is the loop index variable and STEP is loop step.
+
+ This function has been derived from omp_adjust_for_condition.
+ In contrast to the original function it does not add 1 or
+ -1 to the the final value when converting <=,>= to <,>
+ for a pointer-type index variable. Instead, this function
+ adds or subtracts the type size in bytes. This is necessary
+ to determine the number of iterations correctly. */
+
+void
+omp_adjust_for_condition2 (location_t loc, enum tree_code *cond_code, tree *n2,
+ tree v, tree step)
+{
+ switch (*cond_code)
+ {
+ case LT_EXPR:
+ case GT_EXPR:
+ break;
+
+ case NE_EXPR:
+ *cond_code = omp_adjust_neq_condition (v, step);
+ break;
+
+ case LE_EXPR:
+ if (POINTER_TYPE_P (TREE_TYPE (*n2)))
+ {
+ tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v)));
+ HOST_WIDE_INT type_unit = tree_to_shwi (unit);
+
+ *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, type_unit);
+ }
+ else
+ *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2,
+ build_int_cst (TREE_TYPE (*n2), 1));
+ *cond_code = LT_EXPR;
+ break;
+ case GE_EXPR:
+ if (POINTER_TYPE_P (TREE_TYPE (*n2)))
+ {
+ tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v)));
+ HOST_WIDE_INT type_unit = tree_to_shwi (unit);
+ *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1 * type_unit);
+ }
+ else
+ *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2,
+ build_int_cst (TREE_TYPE (*n2), 1));
+ *cond_code = GT_EXPR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Transform the condition of OMP_FOR to either LT_EXPR or GT_EXPR and adjust
+ the final value as necessary. */
+
+static bool
+canonicalize_conditions (gomp_for *omp_for)
+{
+ size_t collapse = gimple_omp_for_collapse (omp_for);
+ location_t loc = gimple_location (omp_for);
+ bool new_decls = false;
+
+ gimple_seq *pre_body = gimple_omp_for_pre_body_ptr (omp_for);
+ for (size_t l = 0; l < collapse; l++)
+ {
+ enum tree_code cond = gimple_omp_for_cond (omp_for, l);
+
+ if (cond == LT_EXPR || cond == GT_EXPR)
+ continue;
+
+ tree incr = gimple_omp_for_incr (omp_for, l);
+ tree step = omp_get_for_step_from_incr (loc, incr);
+ tree index = gimple_omp_for_index (omp_for, l);
+ tree final = gimple_omp_for_final (omp_for, l);
+ tree orig_final = final;
+ /* If final refers to the index variable of an outer level, i.e.
+ the loop nest is non-rectangular, only convert NE_EXPR. This
+ is necessary for unrolling. Unrolling needs to multiply the
+ step by the unrolling factor, but non-constant step values
+ are impossible with NE_EXPR. */
+ if (TREE_CODE (final) == TREE_VEC)
+ {
+ cond = omp_adjust_neq_condition (TREE_VEC_ELT (final, 1),
+ TREE_OPERAND (incr, 1));
+ gimple_omp_for_set_cond (omp_for, l, cond);
+ continue;
+ }
+
+ omp_adjust_for_condition2 (loc, &cond, &final, index, step);
+
+ gimple_omp_for_set_cond (omp_for, l, cond);
+ if (final == orig_final)
+ continue;
+
+ /* If this is a combined construct, gimplify the final on the
+ outer construct. */
+ if (TREE_CODE (final) != INTEGER_CST
+ && !gimple_omp_for_combined_into_p (omp_for))
+ {
+ tree new_final = create_tmp_var (TREE_TYPE (final));
+ gimplify_assign (new_final, final, pre_body);
+ final = new_final;
+ new_decls = true;
+ }
+
+ gimple_omp_for_set_final (omp_for, l, final);
+ }
+
+ return new_decls;
+}
+
+/* Execute the tiling transformation for OMP_FOR with the given TILE_SIZES and
+ return the resulting gimple bind. TILE_SIZES must be a non-empty tree chain
+ of integer constants and the collapse of OMP_FOR must be at least the length
+ of TILE_SIZES. TRANSFORMATION_CLAUSES are the loop transformations that
+ must be applied to OMP_FOR. Those are applied on the result of the tiling
+ transformation. LOC is the location for diagnostic messages.
+
+ Example 1
+ ---------
+ ---------
+
+ Original loop
+ -------------
+
+ #pragma omp for
+ #pragma omp tile sizes(3)
+ for (i = 1; i <= n; i = i + 1)
+ {
+ body;
+ }
+
+ Internally, the tile directive is represented as a clause on the
+ omp for, i.e. as #pragma omp for tile_sizes(3).
+
+ Transformed loop
+ ----------------
+
+ #pragma omp for
+ for (.omp_tile_index = 1; .omp_tile_index < ceil(n/3); .omp_tile_index = .omp_tile_index + 3)
+ {
+ D.4287 = .omp_tile_index + 3 + 1
+ #pragma omp loop_transform
+ for (i = .omp_tile_index; i < D.4287; i = i + 1)
+ {
+ if (i.0 > n)
+ goto L.0
+ body;
+ }
+ L_0:
+ }
+
+ The outer loop is the "floor loop" and the inner loop is the "tile
+ loop". The tile loop is never in canonical loop nest form and
+ hence it cannot be associated with any loop construct. The
+ GCC-internal "omp loop transform" construct will be lowered after
+ the tiling transformation.
+ */
+
+static gimple_seq
+tile (gomp_for *omp_for, location_t loc, size_t start_level, tree tile_sizes,
+ tree transformation_clauses, walk_ctx *ctx)
+{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
+ dump_user_location_t::from_location_t (loc),
+ "Executing tile transformation %T:\n %G\n",
+ transformation_clauses, static_cast<gimple *> (omp_for));
+
+ gimple_seq tile_loops = copy_gimple_seq_and_replace_locals (omp_for);
+ gimple_seq floor_loops = copy_gimple_seq_and_replace_locals (omp_for);
+
+ size_t collapse = gimple_omp_for_collapse (omp_for);
+ size_t tiling_depth = list_length (tile_sizes);
+ tree clauses = gimple_omp_for_clauses (omp_for);
+ size_t clause_collapse = 1;
+ tree collapse_clause = NULL;
+
+ if (tree c = omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "%<ordered%> invalid in conjunction with %<omp tile%>");
+ return omp_for;
+ }
+
+ if (tree c = omp_find_clause (clauses, OMP_CLAUSE_COLLAPSE))
+ {
+ tree expr = OMP_CLAUSE_COLLAPSE_EXPR (c);
+ clause_collapse = tree_to_uhwi (expr);
+ collapse_clause = c;
+ }
+
+ /* The tiled loop nest is a canonical loop nest with nesting depth
+ tiling_depth. The tile loops below that level are not in
+ canonical loop nest form and hence cannot be associated with a
+ loop construct. */
+ if (clause_collapse > tiling_depth + start_level)
+ {
+ error_at (OMP_CLAUSE_LOCATION (collapse_clause),
+ "collapse cannot extend below the floor loops "
+ "generated by the %<omp tile%> construct");
+ OMP_CLAUSE_COLLAPSE_EXPR (collapse_clause)
+ = build_int_cst (unsigned_type_node, start_level + tiling_depth);
+ return transform_gomp_for (omp_for, NULL, ctx);
+ }
+
+ if (start_level + tiling_depth > collapse)
+ return transform_gomp_for (omp_for, NULL, ctx);
+
+ gcc_assert (collapse >= clause_collapse);
+
+ push_gimplify_context ();
+
+ /* Create the index variables for iterating the tiles in the floor
+ loops which will be the loops at levels start_level
+ ... start_level + tiling_depth of the transformed loop nest. The
+ loops at level 0 ... start_level - 1 are left unchanged. */
+ gimple_seq floor_loops_pre_body = NULL;
+ size_t tile_level = 0;
+ auto_vec<tree> sizes_vec;
+ for (tree el = tile_sizes; el; el = TREE_CHAIN (el), tile_level++)
+ {
+ size_t nest_level = start_level + tile_level;
+ tree index = gimple_omp_for_index (omp_for, nest_level);
+ tree init = gimple_omp_for_initial (omp_for, nest_level);
+ tree incr = gimple_omp_for_incr (omp_for, nest_level);
+ tree step = TREE_OPERAND (incr, 1);
+
+ /* Initialize original index variables in the pre-body. The
+ loop lowering will not initialize them because of the changed
+ index variables. */
+ gimplify_assign (index, init, &floor_loops_pre_body);
+
+ tree tile_size = fold_convert (TREE_TYPE (step), TREE_VALUE (el));
+ sizes_vec.safe_push (tile_size);
+ tree tile_index = create_tmp_var (TREE_TYPE (index), ".omp_tile_index");
+ gimplify_assign (tile_index, init, &floor_loops_pre_body);
+
+ /* Floor loops */
+ step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, tile_size);
+ tree tile_step = step;
+ /* For combined constructs, step will be gimplified on the outer
+ gomp_for. */
+ if (!gimple_omp_for_combined_into_p (omp_for) && !TREE_CONSTANT (step))
+ {
+ tile_step = create_tmp_var (TREE_TYPE (step), ".omp_tile_step");
+ gimplify_assign (tile_step, step, &floor_loops_pre_body);
+ }
+ incr = fold_build2 (TREE_CODE (incr), TREE_TYPE (incr), tile_index,
+ tile_step);
+ gimple_omp_for_set_incr (floor_loops, nest_level, incr);
+ gimple_omp_for_set_index (floor_loops, nest_level, tile_index);
+ }
+
+ gbind *result_bind = gimple_build_bind (NULL, NULL, NULL);
+ pop_gimplify_context (result_bind);
+ gimple_seq_add_seq (gimple_omp_for_pre_body_ptr (floor_loops),
+ floor_loops_pre_body);
+
+ /* The tiling loops will not form a perfect loop nest because the
+ loop for each tiling dimension needs to check if the current tile
+ is incomplete and this check is intervening code. Since OpenMP
+ 5.1 does not allow the collapse of the loop-nest to extend beyond
+ the floor loops, this is not a problem.
+
+ "Uncollapse" the tiling loop nest, i.e. split the loop nest into
+ nested separate gomp_for structures for each level. This allows
+ to add the incomplete tile checks to each level loop. */
+
+ tile_loops = gomp_for_uncollapse (as_a <gomp_for *> (tile_loops));
+ for (size_t i = 0; i < start_level; i++)
+ tile_loops = gimple_omp_body (tile_loops);
+
+ gimple_omp_for_set_kind (as_a<gomp_for *> (tile_loops),
+ GF_OMP_FOR_KIND_TRANSFORM_LOOP);
+ gimple_omp_for_set_clauses (tile_loops, NULL_TREE);
+ gimple_omp_for_set_pre_body (tile_loops, NULL);
+
+ /* Transform the loop bodies of the "uncollapsed" tiling loops and
+ add them to the body of the floor loops. At this point, the
+ loop nest consists of perfectly nested gimple_omp_for constructs,
+ each representing a single loop. */
+ gimple_seq floor_loops_body = NULL;
+ gimple *level_loop = tile_loops;
+ gimple_seq_add_stmt (&floor_loops_body, tile_loops);
+ gimple_seq *surrounding_seq = &floor_loops_body;
+
+ push_gimplify_context ();
+
+ tree break_label = create_artificial_label (UNKNOWN_LOCATION);
+ gimple_seq_add_stmt (surrounding_seq, gimple_build_label (break_label));
+ for (size_t tile_level = 0; tile_level < tiling_depth; tile_level++)
+ {
+ gimple_seq level_preamble = NULL;
+ gimple_seq level_body = gimple_omp_body (level_loop);
+ auto gsi = gsi_start (level_body);
+
+ int nest_level = start_level + tile_level;
+ tree original_index = gimple_omp_for_index (omp_for, nest_level);
+ tree original_final = gimple_omp_for_final (omp_for, nest_level);
+
+ tree tile_index
+ = gimple_omp_for_index (floor_loops, nest_level);
+ tree tile_size = sizes_vec[tile_level];
+ tree type = TREE_TYPE (tile_index);
+ tree plus_type = type;
+
+ tree incr = gimple_omp_for_incr (omp_for, nest_level);
+ tree step = omp_get_for_step_from_incr (gimple_location (omp_for), incr);
+
+ gimple_seq *pre_body = gimple_omp_for_pre_body_ptr (level_loop);
+ gcc_assert (gimple_omp_for_collapse (level_loop) == 1);
+ tree_code original_cond = gimple_omp_for_cond (omp_for, nest_level);
+
+ gimple_omp_for_set_initial (level_loop, 0, tile_index);
+
+ tree tile_final = create_tmp_var (type);
+ tree scaled_tile_size
+ = fold_build2 (MULT_EXPR, TREE_TYPE (tile_size), tile_size, step);
+
+ tree_code plus_code = PLUS_EXPR;
+ if (POINTER_TYPE_P (TREE_TYPE (tile_index)))
+ {
+ plus_code = POINTER_PLUS_EXPR;
+ int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scaled_tile_size));
+ plus_type
+ = signed_or_unsigned_type_for (unsignedp, ptrdiff_type_node);
+ }
+
+ scaled_tile_size = fold_convert (plus_type, scaled_tile_size);
+ gimplify_assign (
+ tile_final,
+ fold_build2 (plus_code, type, tile_index, scaled_tile_size),
+ pre_body);
+ gimple_omp_for_set_final (level_loop, 0, tile_final);
+
+ push_gimplify_context ();
+
+ tree body_label = create_artificial_label (UNKNOWN_LOCATION);
+
+ /* Handle partial tiles, i.e. add a check that breaks from the tile loop
+ if the new index value does not belong to the iteration space of the
+ original loop. */
+ gimple_seq_add_stmt (&level_preamble,
+ gimple_build_cond (original_cond, original_index,
+ original_final, body_label,
+ break_label));
+ gimple_seq_add_stmt (&level_preamble, gimple_build_label (body_label));
+
+ gsi_insert_seq_before (&gsi, level_preamble, GSI_SAME_STMT);
+ gbind *level_bind = gimple_build_bind (NULL, NULL, NULL);
+ pop_gimplify_context (level_bind);
+ gimple_bind_set_body (level_bind, level_body);
+ gimple_omp_set_body (level_loop, level_bind);
+
+ surrounding_seq = &level_body;
+ level_loop = gsi_stmt (gsi);
+
+ /* The label for jumping out of the loop at the next
+ nesting level. For the outermost level, the label is put
+ after the loop-nest, for the last one it is not necessary. */
+ if (tile_level != tiling_depth - 1)
+ {
+ break_label = create_artificial_label (UNKNOWN_LOCATION);
+ gsi_insert_after (&gsi, gimple_build_label (break_label),
+ GSI_NEW_STMT);
+ }
+ }
+
+ gbind *tile_loops_bind;
+ tile_loops_bind = gimple_build_bind (NULL, tile_loops, NULL);
+ pop_gimplify_context (tile_loops_bind);
+
+ gimple_omp_set_body (floor_loops, tile_loops_bind);
+
+ tree remaining_clauses = OMP_CLAUSE_CHAIN (transformation_clauses);
+
+ /* Collapsing of the OMP_FOR is used both for the "omp tile"
+ implementation and for the actual "collapse" clause. If the
+ tiling depth was greater than the collapse depth required by the
+ clauses on OMP_FOR, the collapse of OMP_FOR must be adjusted to
+ the latter value and all loops below the new collapse depth must
+ be transformed to GF_OMP_FOR_KIND_TRANSFORM_LOOP to ensure their
+ lowering in this pass. */
+ size_t new_collapse = clause_collapse;
+
+ /* Keep the omp_for collapsed if there are further transformations */
+ if (remaining_clauses)
+ {
+ size_t next_transform_depth = 1;
+ if (OMP_CLAUSE_CODE (remaining_clauses) == OMP_CLAUSE_TILE)
+ next_transform_depth
+ = list_length (OMP_CLAUSE_TILE_SIZES (remaining_clauses));
+
+ size_t next_level
+ = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (remaining_clauses));
+ /* The current "omp tile" transformation reduces the nesting depth
+ of the canonical loop-nest to TILING_DEPTH.
+ Hence the following "omp tile" transformation is invalid if
+ it requires a greater nesting depth. */
+ gcc_assert (next_level + next_transform_depth <=
+ start_level + tiling_depth);
+ if (next_level + next_transform_depth > new_collapse)
+ new_collapse = next_level + next_transform_depth;
+ }
+
+ if (collapse > new_collapse)
+ floor_loops = gomp_for_uncollapse (as_a<gomp_for *> (floor_loops),
+ new_collapse, true);
+
+ /* Lower the uncollapsed tile loops. */
+ walk_omp_for_loops (gimple_bind_body_ptr (tile_loops_bind), ctx);
+
+ gcc_assert (remaining_clauses || !collapse_clause
+ || gimple_omp_for_collapse (floor_loops)
+ == (size_t)clause_collapse);
+
+ if (gimple_omp_for_combined_into_p (omp_for))
+ ctx->inner_combined_loop = as_a<gomp_for *> (floor_loops);
+
+ /* Apply remaining transformation clauses and assemble the transformation
+ result. */
+ gimple_bind_set_body (result_bind,
+ transform_gomp_for (as_a<gomp_for *> (floor_loops),
+ remaining_clauses, ctx));
+
+ return result_bind;
+}
+
+/* Combined distribute or taskloop constructs are represented by two
+ or more nested gomp_for constructs which are created during
+ gimplification. Loop transformations on the combined construct are
+ executed on the innermost gomp_for. This function adjusts the loop
+ header of an outer OMP_FOR loop to the changes made by the
+ transformations on the inner loop which is provided by the CTX. */
+static gimple_seq
+adjust_combined_loop (gomp_for *omp_for, walk_ctx *ctx)
+{
+ gcc_assert (gimple_omp_for_combined_p (omp_for));
+ gcc_assert (ctx->inner_combined_loop);
+
+ gomp_for *inner_omp_for = ctx->inner_combined_loop;
+ size_t collapse = gimple_omp_for_collapse (inner_omp_for);
+
+ int kind = gimple_omp_for_kind (omp_for);
+ if (kind == GF_OMP_FOR_KIND_DISTRIBUTE || kind == GF_OMP_FOR_KIND_TASKLOOP)
+ {
+ for (size_t level = 0; level < collapse; ++level)
+ {
+ tree outer_incr = gimple_omp_for_incr (omp_for, level);
+ tree inner_incr = gimple_omp_for_incr (inner_omp_for, level);
+ gcc_assert (TREE_TYPE (inner_incr) == TREE_TYPE (outer_incr));
+
+ tree inner_final = gimple_omp_for_final (inner_omp_for, level);
+ enum tree_code inner_cond
+ = gimple_omp_for_cond (inner_omp_for, level);
+ gimple_omp_for_set_cond (omp_for, level, inner_cond);
+
+ tree inner_step = TREE_OPERAND (inner_incr, 1);
+ /* If this omp_for is the outermost loop belonging to a
+ combined construct, gimplify the step into its
+ prebody. Otherwise, just gimplify the step on the inner
+ gomp_for and move the ungimplified step expression
+ here. */
+ if (!gimple_omp_for_combined_into_p (omp_for)
+ && !TREE_CONSTANT (inner_step))
+ {
+ push_gimplify_context ();
+ tree step = create_tmp_var (TREE_TYPE (inner_incr),
+ ".omp_combined_step");
+ gimplify_assign (step, inner_step,
+ gimple_omp_for_pre_body_ptr (omp_for));
+ pop_gimplify_context (ctx->bind);
+ TREE_OPERAND (outer_incr, 1) = step;
+ }
+ else
+ TREE_OPERAND (outer_incr, 1) = inner_step;
+
+ if (!gimple_omp_for_combined_into_p (omp_for)
+ && !TREE_CONSTANT (inner_final))
+ {
+ push_gimplify_context ();
+ tree final = create_tmp_var (TREE_TYPE (inner_final),
+ ".omp_combined_final");
+ gimplify_assign (final, inner_final,
+ gimple_omp_for_pre_body_ptr (omp_for));
+ pop_gimplify_context (ctx->bind);
+ gimple_omp_for_set_final (omp_for, level, final);
+ }
+ else
+ gimple_omp_for_set_final (omp_for, level, inner_final);
+
+ /* Gimplify the step on the inner loop of the combined construct. */
+ if (!TREE_CONSTANT (inner_step))
+ {
+ push_gimplify_context ();
+ tree step = create_tmp_var (TREE_TYPE (inner_incr),
+ ".omp_combined_step");
+ gimplify_assign (step, inner_step,
+ gimple_omp_for_pre_body_ptr (inner_omp_for));
+ TREE_OPERAND (inner_incr, 1) = step;
+ pop_gimplify_context (ctx->bind);
+
+ tree private_clause = build_omp_clause (
+ gimple_location (omp_for), OMP_CLAUSE_PRIVATE);
+ OMP_CLAUSE_DECL (private_clause) = step;
+ tree *clauses = gimple_omp_for_clauses_ptr (inner_omp_for);
+ *clauses = chainon (*clauses, private_clause);
+ }
+
+ /* Gimplify the final on the inner loop of the combined construct. */
+ if (!TREE_CONSTANT (inner_final))
+ {
+ push_gimplify_context ();
+ tree final = create_tmp_var (TREE_TYPE (inner_incr),
+ ".omp_combined_final");
+ gimplify_assign (final, inner_final,
+ gimple_omp_for_pre_body_ptr (inner_omp_for));
+ gimple_omp_for_set_final (inner_omp_for, level, final);
+ pop_gimplify_context (ctx->bind);
+
+ tree private_clause = build_omp_clause (
+ gimple_location (omp_for), OMP_CLAUSE_PRIVATE);
+ OMP_CLAUSE_DECL (private_clause) = final;
+ tree *clauses = gimple_omp_for_clauses_ptr (inner_omp_for);
+ *clauses = chainon (*clauses, private_clause);
+ }
+ }
+ }
+
+ if (gimple_omp_for_combined_into_p (omp_for))
+ ctx->inner_combined_loop = omp_for;
+ else
+ ctx->inner_combined_loop = NULL;
+
+ return omp_for;
+}
+
+/* Transform OMP_FOR recursively according to the clause chain
+ TRANSFORMATION. Return the resulting sequence of gimple statements.
+
+ This function dispatches OMP_FOR to the handler function for the
+ TRANSFORMATION clause. The handler function is responsible for invoking this
+ function recursively for executing the remaining transformations. */
+
+static gimple_seq
+transform_gomp_for (gomp_for *omp_for, tree transformation, walk_ctx *ctx)
+{
+ if (!transformation)
+ {
+ if (gimple_omp_for_kind (omp_for) == GF_OMP_FOR_KIND_TRANSFORM_LOOP)
+ return expand_transformed_loop (omp_for);
+
+ return omp_for;
+ }
+
+ push_gimplify_context ();
+
+ bool added_decls = canonicalize_conditions (omp_for);
+
+ gimple_seq result = NULL;
+ location_t loc = OMP_CLAUSE_LOCATION (transformation);
+ auto dump_loc = dump_user_location_t::from_location_t (loc);
+ size_t level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (transformation));
+ switch (OMP_CLAUSE_CODE (transformation))
+ {
+ case OMP_CLAUSE_UNROLL_FULL:
+ gcc_assert (TREE_CHAIN (transformation) == NULL);
+ gcc_assert (level == 0);
+ result = full_unroll (omp_for, loc, ctx);
+ break;
+ case OMP_CLAUSE_UNROLL_NONE:
+ gcc_assert (TREE_CHAIN (transformation) == NULL);
+ gcc_assert (level == 0);
+ if (assign_unroll_full_clause_p (omp_for, transformation))
+ {
+ result = full_unroll (omp_for, loc, ctx);
+ }
+ else if (tree unroll_factor
+ = assign_unroll_partial_clause_p (omp_for, transformation))
+ {
+ result = partial_unroll (omp_for, level, unroll_factor, loc,
+ transformation, ctx);
+ }
+ else {
+ if (dump_enabled_p ())
+ {
+ /* TODO Try to inform the unrolling pass that the user
+ wants to unroll this loop. This could relax some
+ restrictions there, e.g. on the code size? */
+ dump_printf_loc (
+ MSG_MISSED_OPTIMIZATION, dump_loc,
+ "not unrolling loop with %<omp unroll%> directive. Add "
+ "clause to specify unrolling type or invoke the "
+ "compiler with --param=omp-unroll-default-factor=n for some"
+ "constant integer n");
+ }
+ result = transform_gomp_for (omp_for, NULL, ctx);
+ }
+
+ break;
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ {
+ tree unroll_factor = OMP_CLAUSE_UNROLL_PARTIAL_EXPR (transformation);
+ if (!unroll_factor)
+ {
+ // TODO Use target architecture dependent constants?
+ unsigned factor = param_omp_unroll_default_factor > 0
+ ? param_omp_unroll_default_factor
+ : 5;
+ unroll_factor = build_int_cst (integer_type_node, factor);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, dump_loc,
+ "%<partial%> clause without unrolling "
+ "factor turned into %<partial(%u)%> clause\n",
+ factor);
+ }
+
+ result = partial_unroll (omp_for, level,
+ unroll_factor, loc, transformation, ctx);
+ }
+ break;
+ case OMP_CLAUSE_TILE:
+ result = tile (omp_for, loc, level,
+ OMP_CLAUSE_TILE_SIZES (transformation),
+ transformation, ctx);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (added_decls && gimple_code (result) != GIMPLE_BIND)
+ result = gimple_build_bind (NULL, result, NULL);
+ pop_gimplify_context (added_decls ? result : NULL); /* for decls from canonicalize_loops */
+
+ return result;
+}
+
+/* Remove all loop transformation clauses from the clauses of OMP_FOR and
+ return a new tree chain containing just those clauses.
+
+ The clauses correspond to transformation *directives* associated with the
+ OMP_FOR's loop. The returned clauses are ordered from the innermost
+ directive to the outermost, i.e. in the order in which the transformations
+ should execute.
+
+ Example:
+ --------
+ --------
+
+ The loop
+
+ #pragma omp for nowait
+ #pragma omp unroll partial(5)
+ #pragma omp tile sizes(2,2)
+ LOOP
+
+ is represented as
+
+ #pragma omp for nowait unroll_partial(5) tile_sizes(2,2)
+ LOOP
+
+ Gimplification may add clauses after the transformation clauses added
+ by the front ends. This function will leave only the "nowait" clause on
+ OMP_FOR and return the clauses "tile_sizes(2,2) unroll_partial(5)". */
+
+static tree
+gomp_for_remove_transformation_clauses (gomp_for *omp_for)
+{
+ tree *clauses = gimple_omp_for_clauses_ptr (omp_for);
+ tree trans_clauses = NULL;
+ tree last_other_clause = NULL;
+
+ for (tree c = gimple_omp_for_clauses (omp_for); c != NULL_TREE;)
+ {
+ tree chain_tail = OMP_CLAUSE_CHAIN (c);
+ if (omp_loop_transform_clause_p (c))
+ {
+ if (last_other_clause)
+ OMP_CLAUSE_CHAIN (last_other_clause) = chain_tail;
+ else
+ *clauses = OMP_CLAUSE_CHAIN (c);
+
+ OMP_CLAUSE_CHAIN (c) = NULL;
+ trans_clauses = chainon (trans_clauses, c);
+ }
+ else
+ {
+ /* There should be no other clauses between loop transformations ... */
+ gcc_assert (!trans_clauses || !last_other_clause
+ || TREE_CHAIN (last_other_clause) == c);
+ /* ... and hence stop if transformations were found before the
+ non-transformation clause C. */
+ if (trans_clauses)
+ break;
+ last_other_clause = c;
+ }
+
+ c = chain_tail;
+ }
+
+ return nreverse (trans_clauses);
+}
+
+static void
+print_optimized_unroll_partial_msg (tree c)
+{
+ gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_UNROLL_PARTIAL);
+ location_t loc = OMP_CLAUSE_LOCATION (c);
+ dump_user_location_t dump_loc;
+ dump_loc = dump_user_location_t::from_location_t (loc);
+
+ tree unroll_factor = OMP_CLAUSE_UNROLL_PARTIAL_EXPR (c);
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, dump_loc,
+ "replaced consecutive %<omp unroll%> directives by "
+ "%<omp unroll partial(" HOST_WIDE_INT_PRINT_UNSIGNED
+ ")%>\n", tree_to_uhwi (unroll_factor));
+}
+
+/* Optimize CLAUSES by removing and merging redundant clauses. Return the
+ optimized clause chain. */
+
+static tree
+optimize_transformation_clauses (tree clauses)
+{
+ if (!clauses)
+ return NULL_TREE;
+
+ /* The last unroll_partial clause seen in clauses, if any,
+ or the last merged unroll partial clause. */
+ tree unroll_partial = NULL;
+ /* The last clause was not a unroll_partial clause, if any.
+ unroll_full and unroll_none are not relevant because
+ they appear only at the end of a chain. */
+ tree last_non_unroll = NULL;
+ /* Indicates that at least two unroll_partial clauses have been merged
+ since last_non_unroll was seen. */
+ bool merged_unroll_partial = false;
+
+ size_t level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (clauses));
+ for (tree c = clauses; c != NULL_TREE; c = OMP_CLAUSE_CHAIN (c))
+ {
+ enum omp_clause_code code = OMP_CLAUSE_CODE (c);
+
+ switch (code)
+ {
+ case OMP_CLAUSE_UNROLL_NONE:
+ /* 'unroll' without a clause cannot be followed by any
+ transformations because its result does not have canonical loop
+ nest form. */
+ gcc_assert (OMP_CLAUSE_CHAIN (c) == NULL);
+ unroll_partial = NULL;
+ merged_unroll_partial = false;
+ break;
+ case OMP_CLAUSE_UNROLL_FULL:
+ /* 'unroll full' cannot be followed by any transformations because
+ its result does not have canonical loop nest form. */
+ gcc_assert (OMP_CLAUSE_CHAIN (c) == NULL);
+
+ /* Previous 'unroll partial' directives are useless. */
+ if (unroll_partial)
+ {
+ if (last_non_unroll)
+ OMP_CLAUSE_CHAIN (last_non_unroll) = c;
+ else
+ clauses = c;
+
+ if (dump_enabled_p ())
+ {
+ location_t loc = OMP_CLAUSE_LOCATION (c);
+ dump_user_location_t dump_loc;
+ dump_loc = dump_user_location_t::from_location_t (loc);
+
+ dump_printf_loc (
+ MSG_OPTIMIZED_LOCATIONS, dump_loc,
+ "removed useless %<omp unroll partial%> directives "
+ "preceding 'omp unroll full'\n");
+ }
+ }
+ unroll_partial = NULL;
+ merged_unroll_partial = false;
+ break;
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ {
+ /* Merge a sequence of consecutive 'unroll partial' directives.
+ Note that it impossible for 'unroll full' or 'unroll' to
+ appear inbetween the 'unroll partial' clauses because they
+ remove the loop-nest. */
+ if (unroll_partial)
+ {
+ tree factor = OMP_CLAUSE_UNROLL_PARTIAL_EXPR (unroll_partial);
+ tree c_factor = OMP_CLAUSE_UNROLL_PARTIAL_EXPR (c);
+ if (factor && c_factor)
+ factor = fold_build2 (MULT_EXPR, TREE_TYPE (factor), factor,
+ c_factor);
+ else if (!factor && c_factor)
+ factor = c_factor;
+
+ gcc_assert (!factor || TREE_CODE (factor) == INTEGER_CST);
+
+ OMP_CLAUSE_UNROLL_PARTIAL_EXPR (unroll_partial) = factor;
+ OMP_CLAUSE_CHAIN (unroll_partial) = OMP_CLAUSE_CHAIN (c);
+ OMP_CLAUSE_LOCATION (unroll_partial) = OMP_CLAUSE_LOCATION (c);
+ merged_unroll_partial = true;
+ }
+ else
+ unroll_partial = c;
+ }
+ break;
+ case OMP_CLAUSE_TILE:
+ {
+ /* No optimization for those clauses yet, but they end any chain of
+ "unroll partial" clauses. */
+ if (merged_unroll_partial && dump_enabled_p ())
+ print_optimized_unroll_partial_msg (unroll_partial);
+
+ if (unroll_partial)
+ OMP_CLAUSE_CHAIN (unroll_partial) = c;
+
+ unroll_partial = NULL;
+ merged_unroll_partial = false;
+ last_non_unroll = c;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* The transformations are ordered by the level of the loop-nest to which
+ they apply in decreasing order. Handle the different levels separately
+ as long as we do not implement optimizations across the levels. */
+ tree next_c = OMP_CLAUSE_CHAIN (c);
+ if (!next_c)
+ break;
+
+ size_t next_level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (next_c));
+ if (next_level != level)
+ {
+ gcc_assert (next_level < level);
+ tree tail = optimize_transformation_clauses (next_c);
+ OMP_CLAUSE_CHAIN (c) = tail;
+ break;
+ }
+ else level = next_level;
+
+ }
+
+ if (merged_unroll_partial && dump_enabled_p ())
+ print_optimized_unroll_partial_msg (unroll_partial);
+
+ return clauses;
+}
+
+/* Visit the current statement in GSI_P in the walk_omp_for_loops walk and
+ execute all loop transformations found on it. */
+
+void
+process_omp_for (gomp_for *omp_for, gimple_seq *containing_seq, walk_ctx *ctx)
+{
+ auto gsi_p = gsi_for_stmt (omp_for, containing_seq);
+ tree transform_clauses = gomp_for_remove_transformation_clauses (omp_for);
+
+ /* Do not attempt to transform broken code which might violate the
+ assumptions of the loop transformation implementations.
+
+ Transformation clauses must be dropped first because following
+ passes do not handle them. */
+ if (seen_error ())
+ return;
+
+ transform_clauses = optimize_transformation_clauses (transform_clauses);
+
+ gimple *transformed = omp_for;
+ if (gimple_omp_for_combined_p (omp_for)
+ && ctx->inner_combined_loop)
+ transformed = adjust_combined_loop (omp_for, ctx);
+ else
+ transformed = transform_gomp_for (omp_for, transform_clauses, ctx);
+
+ if (transformed == omp_for)
+ return;
+
+ gsi_replace_with_seq (&gsi_p, transformed, true);
+
+ if (!dump_enabled_p () || !(dump_flags & TDF_DETAILS))
+ return;
+
+ if (transformed)
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, transformed,
+ "Transformed loop: %G\n\n", transformed);
+}
+
+/* Traverse SEQ in depth-first order and apply the loop transformation
+ found on gomp_for statements. */
+
+static unsigned int
+walk_omp_for_loops (gimple_seq *seq, walk_ctx *ctx)
+{
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start (*seq); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_OMP_CRITICAL:
+ case GIMPLE_OMP_MASTER:
+ case GIMPLE_OMP_MASKED:
+ case GIMPLE_OMP_TASKGROUP:
+ case GIMPLE_OMP_ORDERED:
+ case GIMPLE_OMP_SCAN:
+ case GIMPLE_OMP_SECTION:
+ case GIMPLE_OMP_PARALLEL:
+ case GIMPLE_OMP_TASK:
+ case GIMPLE_OMP_SCOPE:
+ case GIMPLE_OMP_SECTIONS:
+ case GIMPLE_OMP_SINGLE:
+ case GIMPLE_OMP_TARGET:
+ case GIMPLE_OMP_TEAMS:
+ case GIMPLE_OMP_STRUCTURED_BLOCK:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (gimple_omp_body_ptr (stmt), ctx);
+ ctx->bind = bind;
+ break;
+ }
+ case GIMPLE_OMP_FOR:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (gimple_omp_for_pre_body_ptr (stmt), ctx);
+ walk_omp_for_loops (gimple_omp_body_ptr (stmt), ctx);
+ ctx->bind = bind;
+ process_omp_for (as_a<gomp_for *> (stmt), seq, ctx);
+ break;
+ }
+ case GIMPLE_BIND:
+ {
+ gbind *bind = as_a<gbind *> (stmt);
+ ctx->bind = bind;
+ walk_omp_for_loops (gimple_bind_body_ptr (bind), ctx);
+ ctx->bind = bind;
+ break;
+ }
+ case GIMPLE_TRY:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (gimple_try_eval_ptr (as_a<gtry *> (stmt)),
+ ctx);
+ walk_omp_for_loops (gimple_try_cleanup_ptr (as_a<gtry *> (stmt)),
+ ctx);
+ ctx->bind = bind;
+ break;
+ }
+
+ case GIMPLE_CATCH:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (
+ gimple_catch_handler_ptr (as_a<gcatch *> (stmt)), ctx);
+ ctx->bind = bind;
+ break;
+ }
+
+ case GIMPLE_EH_FILTER:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (gimple_eh_filter_failure_ptr (stmt), ctx);
+ ctx->bind = bind;
+ break;
+ }
+
+ case GIMPLE_EH_ELSE:
+ {
+ gbind *bind = ctx->bind;
+ geh_else *eh_else_stmt = as_a<geh_else *> (stmt);
+ walk_omp_for_loops (gimple_eh_else_n_body_ptr (eh_else_stmt), ctx);
+ walk_omp_for_loops (gimple_eh_else_e_body_ptr (eh_else_stmt), ctx);
+ ctx->bind = bind;
+ break;
+ }
+ break;
+
+ case GIMPLE_WITH_CLEANUP_EXPR:
+ {
+ gbind *bind = ctx->bind;
+ walk_omp_for_loops (gimple_wce_cleanup_ptr (stmt), ctx);
+ ctx->bind = bind;
+ break;
+ }
+
+ case GIMPLE_TRANSACTION:
+ {
+ gbind *bind = ctx->bind;
+ auto trans = as_a<gtransaction *> (stmt);
+ walk_omp_for_loops (gimple_transaction_body_ptr (trans), ctx);
+ ctx->bind = bind;
+ break;
+ }
+
+ case GIMPLE_ASSUME:
+ break;
+
+ default:
+ gcc_assert (!gimple_has_substatements (stmt));
+ continue;
+ }
+ }
+
+ return true;
+}
+
+static unsigned int
+execute_omp_transform_loops ()
+{
+ gimple_seq body = gimple_body (current_function_decl);
+ walk_ctx ctx;
+ ctx.inner_combined_loop = NULL;
+ ctx.bind = NULL;
+ walk_omp_for_loops (&body, &ctx);
+
+ return 0;
+}
+
+namespace
+{
+
+const pass_data pass_data_omp_transform_loops = {
+ GIMPLE_PASS, /* type */
+ "omp_transform_loops", /* name */
+ OPTGROUP_OMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_gimple_any, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_omp_transform_loops : public gimple_opt_pass
+{
+public:
+ pass_omp_transform_loops (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_omp_transform_loops, ctxt)
+ {
+ }
+
+ /* opt_pass methods: */
+ virtual unsigned int
+ execute (function *)
+ {
+ return execute_omp_transform_loops ();
+ }
+ virtual bool
+ gate (function *)
+ {
+ return flag_openmp || flag_openmp_simd;
+ }
+
+}; // class pass_omp_transform_loops
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_transform_loops (gcc::context *ctxt)
+{
+ return new pass_omp_transform_loops (ctxt);
+}
@@ -812,6 +812,14 @@ Enum(openacc_privatization) String(quiet) Value(OPENACC_PRIVATIZATION_QUIET)
EnumValue
Enum(openacc_privatization) String(noisy) Value(OPENACC_PRIVATIZATION_NOISY)
+-param=omp-unroll-full-max-iterations=
+Common Joined UInteger Var(param_omp_unroll_full_max_iterations) Init(5) Param Optimization
+The maximum number of iterations of a loop for which an 'omp unroll' directive on the loop without a clause is turned into an 'omp unroll full'.
+
+-param=omp-unroll-default-factor=
+Common Joined UInteger Var(param_omp_unroll_default_factor) Init(0) Param Optimization
+The unroll factor used for loops that have an 'omp unroll partial' directive without an explicit unroll factor.
+
-param=parloops-chunk-size=
Common Joined UInteger Var(param_parloops_chunk_size) Param Optimization
Chunk size of omp schedule for loops parallelized by parloops.
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_diagnose_omp_blocks);
NEXT_PASS (pass_diagnose_tm_blocks);
NEXT_PASS (pass_omp_oacc_kernels_decompose);
+ NEXT_PASS (pass_omp_transform_loops);
NEXT_PASS (pass_lower_omp);
NEXT_PASS (pass_lower_cf);
NEXT_PASS (pass_lower_tm);
@@ -524,6 +524,18 @@ enum omp_clause_code {
/* OpenACC clause: nohost. */
OMP_CLAUSE_NOHOST,
+
+ /* Internal representation for an "omp unroll full" directive. */
+ OMP_CLAUSE_UNROLL_FULL,
+
+ /* Internal representation for an "omp unroll" directive without a clause. */
+ OMP_CLAUSE_UNROLL_NONE,
+
+ /* Internal representation for an "omp unroll partial" directive. */
+ OMP_CLAUSE_UNROLL_PARTIAL,
+
+ /* Represents a "tile" directive internally. */
+ OMP_CLAUSE_TILE
};
#undef DEFTREESTRUCT
@@ -1493,6 +1493,13 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE__SCANTEMP_:
break;
+ /* Clauses related to loop transforms. */
+ case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ break;
+
/* The following clause belongs to the OpenACC cache directive, which
is discarded during gimplification. */
case OMP_CLAUSE__CACHE_:
@@ -2290,6 +2297,13 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE__SCANTEMP_:
break;
+ /* Clauses related to loop transforms. */
+ case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE_UNROLL_FULL:
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_UNROLL_NONE:
+ break;
+
/* The following clause belongs to the OpenACC cache directive, which
is discarded during gimplification. */
case OMP_CLAUSE__CACHE_:
@@ -428,6 +428,7 @@ extern gimple_opt_pass *make_pass_lower_switch_O0 (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_omp_transform_loops (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
@@ -505,6 +505,54 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
case OMP_CLAUSE_EXCLUSIVE:
name = "exclusive";
goto print_remap;
+ case OMP_CLAUSE_UNROLL_FULL:
+ pp_string (pp, "unroll_full");
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
+ break;
+ case OMP_CLAUSE_UNROLL_NONE:
+ pp_string (pp, "unroll_none");
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
+ break;
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ pp_string (pp, "unroll_partial");
+ if (OMP_CLAUSE_UNROLL_PARTIAL_EXPR (clause))
+ {
+ pp_left_paren (pp);
+ dump_generic_node (pp, OMP_CLAUSE_UNROLL_PARTIAL_EXPR (clause), spc, flags,
+ false);
+ pp_right_paren (pp);
+ }
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
+ break;
+ case OMP_CLAUSE_TILE:
+ pp_string (pp, "tile sizes");
+ pp_left_paren (pp);
+ gcc_assert (OMP_CLAUSE_TILE_SIZES (clause));
+ dump_generic_node (pp, OMP_CLAUSE_TILE_SIZES (clause), spc, flags,
+ false);
+ pp_right_paren (pp);
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
+ break;
case OMP_CLAUSE__LOOPTEMP_:
name = "_looptemp_";
goto print_remap;
@@ -3633,6 +3681,10 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
pp_string (pp, "#pragma omp distribute");
goto dump_omp_loop;
+ case OMP_LOOP_TRANS:
+ pp_string (pp, "#pragma omp loop_transform");
+ goto dump_omp_loop;
+
case OMP_TASKLOOP:
pp_string (pp, "#pragma omp taskloop");
goto dump_omp_loop;
@@ -326,6 +326,10 @@ unsigned const char omp_clause_num_ops[] =
0, /* OMP_CLAUSE_IF_PRESENT */
0, /* OMP_CLAUSE_FINALIZE */
0, /* OMP_CLAUSE_NOHOST */
+ 1, /* OMP_CLAUSE_UNROLL_FULL */
+ 1, /* OMP_CLAUSE_UNROLL_NONE */
+ 2, /* OMP_CLAUSE_UNROLL_PARTIAL */
+ 2, /* OMP_CLAUSE_TILE */
};
const char * const omp_clause_code_name[] =
@@ -417,6 +421,10 @@ const char * const omp_clause_code_name[] =
"if_present",
"finalize",
"nohost",
+ "unroll_full",
+ "unroll_none",
+ "unroll_partial",
+ "tile",
};
/* Unless specific to OpenACC, we tend to internally maintain OpenMP-centric
@@ -1214,6 +1214,12 @@ DEFTREECODE (OMP_TASK, "omp_task", tcc_statement, 2)
DEFTREECODE (OMP_FOR, "omp_for", tcc_statement, 7)
+/* OpenMP - A loop nest to which a loop transformation such as #pragma omp
+ unroll should be applied, but which is not associated with another directive
+ such as #pragma omp for. The kind of loop transformations to be applied are
+ internally represented by clauses. Operands like for OMP_FOR. */
+DEFTREECODE (OMP_LOOP_TRANS, "omp_loop_trans", tcc_statement, 7)
+
/* OpenMP - #pragma omp simd [clause1 ... clauseN]
Operands like for OMP_FOR. */
DEFTREECODE (OMP_SIMD, "omp_simd", tcc_statement, 7)
@@ -1836,6 +1836,17 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_USE_DEVICE_PTR_IF_PRESENT(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_USE_DEVICE_PTR)->base.public_flag)
+/* The level of a collapsed loop nest at which the tranformation represented
+ by this clause should be applied. */
+#define OMP_CLAUSE_TRANSFORM_LEVEL(NODE) \
+ OMP_CLAUSE_OPERAND (NODE, 0)
+
+#define OMP_CLAUSE_UNROLL_PARTIAL_EXPR(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_UNROLL_PARTIAL), 1)
+
+#define OMP_CLAUSE_TILE_SIZES(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 1)
+
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
From: Frederik Harwath <frederik@codesourcery.com> This patch adds support for the OMP_LOOP_TRANS tree node, internal OpenMP clauses representing loop transformations, and the omp_transform_loops pass to lower them. gcc/ChangeLog: * Makefile.in (OBJS): Add omp-transform-loops.o. * gimple-pretty-print.cc (dump_gimple_omp_for): Handle GF_OMP_FOR_KIND_TRANSFORM_LOOP. * gimple.h (enum gf_mask): Add GF_FOR_KIND_TRANSFORM_LOOP. * gimplify.cc (is_gimple_stmt): Add OMP_LOOP_TRANS. (gimplify_scan_omp_clauses): Handle loop transform clauses. (gimplify_adjust_omp_clauses): Likewise. (omp_for_drop_tile_clauses): New function. (gimplify_omp_for): Call omp_for_drop_tile_clauses. Handle OMP_LOOP_TRANS and loop transform clauses. (gimplify_omp_loop): Handle loop transform clauses. (gimplify_expr): Handle OMP_LOOP_TRANS. * omp-general.cc (omp_loop_transform_clause_p): New function. * omp-general.h (omp_loop_transform_clause_p): Declare. * omp-transform-loops.cc: New file. * params.opt (omp-unroll-full-max-iterations): New. (omp-unroll-default-factor): New. * passes.def: Add pass_omp_transform_loops before pass_lower_omp. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_UNROLL_FULL, OMP_CLAUSE_UNROLL_NONE, OMP_CLAUSE_UNROLL_PARTIAL, and OMP_CLAUSE_TILE. * tree-nested.cc (convert_nonlocal_omp_clauses): Add support for loop transform clauses. (convert_local_omp_clauses): Likewise. * tree-pass.h (make_pass_omp_transform_loops): Declare. * tree-pretty-print.cc (dump_omp_clause): Add support for loop transform clauses. (dump_generic_node): Handle OMP_LOOP_TRANS. * tree.cc (omp_clause_num_ops): Add entries for loop transforms. (omp_clause_code_name): Likewise. * tree.def (OMP_LOOP_TRANS): New. * tree.h (OMP_CLAUSE_TRANSFORM_LEVEL): New. (OMP_CLAUSE_UNROLL_PARTIAL_EXPR): New. (OMP_CLAUSE_TILE_SIZES): New. * doc/invoke.texi (Optimize Options): Document the new parameters. Co-Authored-By: Sandra Loosemore <sandra@codesourcery.com> --- gcc/Makefile.in | 1 + gcc/doc/invoke.texi | 9 + gcc/gimple-pretty-print.cc | 6 + gcc/gimple.h | 1 + gcc/gimplify.cc | 65 +- gcc/omp-general.cc | 14 + gcc/omp-general.h | 1 + gcc/omp-transform-loops.cc | 1815 ++++++++++++++++++++++++++++++++++++ gcc/params.opt | 8 + gcc/passes.def | 1 + gcc/tree-core.h | 12 + gcc/tree-nested.cc | 14 + gcc/tree-pass.h | 1 + gcc/tree-pretty-print.cc | 52 ++ gcc/tree.cc | 8 + gcc/tree.def | 6 + gcc/tree.h | 11 + 17 files changed, 2024 insertions(+), 1 deletion(-) create mode 100644 gcc/omp-transform-loops.cc