@@ -153,7 +153,7 @@ struct gimplify_omp_ctx
splay_tree variables;
hash_set<tree> *privatized_types;
/* Iteration variables in an OMP_FOR. */
- vec<tree> iter_vars;
+ vec<tree> loop_iter_var;
location_t location;
enum omp_clause_default_kind default_kind;
enum omp_region_type region_type;
@@ -392,7 +392,7 @@ delete_omp_context (struct gimplify_omp_ctx *c)
{
splay_tree_delete (c->variables);
delete c->privatized_types;
- c->iter_vars.release ();
+ c->loop_iter_var.release ();
XDELETE (c);
}
@@ -7490,8 +7490,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
== TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
== TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
- gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
- (OMP_FOR_INIT (for_stmt)));
+
+ tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
+ bool is_doacross = false;
+ if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+ {
+ is_doacross = true;
+ gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+ (OMP_FOR_INIT (for_stmt)));
+ }
for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
{
t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7500,11 +7507,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
gcc_assert (DECL_P (decl));
gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
|| POINTER_TYPE_P (TREE_TYPE (decl)));
- if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
- gimplify_omp_ctxp->iter_vars.quick_push
- (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
- else
- gimplify_omp_ctxp->iter_vars.quick_push (decl);
+ if (is_doacross)
+ {
+ if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
+ gimplify_omp_ctxp->loop_iter_var.quick_push
+ (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+ else
+ gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
+ }
/* Make sure the iteration variable is private. */
tree c = NULL_TREE;
@@ -8387,6 +8397,56 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
return GS_ALL_DONE;
}
+/* Gimplify an OMP_ORDERED construct. EXPR is the tree version. BODY
+ is the OMP_BODY of the original EXPR (which has already been
+ gimplified so it's not present in the EXPR).
+
+ Return the gimplified GIMPLE_OMP_ORDERED tuple. */
+
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+ tree c, decls;
+ int failures = 0;
+ unsigned int i;
+
+ if (gimplify_omp_ctxp)
+ for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+ && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+ {
+ bool fail = false;
+ for (decls = OMP_CLAUSE_DECL (c), i = 0;
+ decls && TREE_CODE (decls) == TREE_LIST;
+ decls = TREE_CHAIN (decls), ++i)
+ if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+ && TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "variable %qE is not an iteration "
+ "of outermost loop %d, expected %qE",
+ TREE_VALUE (decls), i + 1,
+ gimplify_omp_ctxp->loop_iter_var[i]);
+ fail = true;
+ failures++;
+ }
+ /* Avoid being too redundant. */
+ if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "number of variables in depend(sink) "
+ "clause does not match number of "
+ "iteration variables");
+ fail = true;
+ failures++;
+ }
+ }
+
+ if (failures)
+ return gimple_build_nop ();
+ return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
/* Convert the GENERIC expression tree *EXPR_P to GIMPLE. If the
expression produces a value to be used as an operand inside a GIMPLE
statement, the value will be stored back in *EXPR_P. This value will
@@ -9200,38 +9260,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
}
break;
case OMP_ORDERED:
- if (gimplify_omp_ctxp)
- for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
- c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
- && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
- {
- unsigned int n = 0;
- bool fail = false;
- for (tree decls = OMP_CLAUSE_DECL (c);
- decls && TREE_CODE (decls) == TREE_LIST;
- decls = TREE_CHAIN (decls), ++n)
- if (n < gimplify_omp_ctxp->iter_vars.length ()
- && TREE_VALUE (decls)
- != gimplify_omp_ctxp->iter_vars[n])
- {
- error_at (OMP_CLAUSE_LOCATION (c),
- "variable %qE is not an iteration "
- "of outermost loop %d, expected %qE",
- TREE_VALUE (decls), n + 1,
- gimplify_omp_ctxp->iter_vars[n]);
- fail = true;
- }
- /* Avoid being too redundant. */
- if (!fail
- && n != gimplify_omp_ctxp->iter_vars.length ())
- error_at (OMP_CLAUSE_LOCATION (c),
- "number of variables in depend(sink) clause "
- "does not match number of iteration variables");
- }
-
- g = gimple_build_omp_ordered (body,
- OMP_ORDERED_CLAUSES (*expr_p));
+ g = gimplify_omp_ordered (*expr_p, body);
break;
case OMP_CRITICAL:
gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
@@ -11969,6 +11969,219 @@ lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
}
+/* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible. */
+
+static void
+lower_omp_ordered_clauses (gomp_ordered *ord_stmt, omp_context *ctx)
+{
+ struct omp_for_data fd;
+ if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
+ return;
+
+ unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
+ if (!len)
+ return;
+ struct omp_for_data_loop *loops
+ = (struct omp_for_data_loop *)
+ alloca (len * sizeof (struct omp_for_data_loop));
+ extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
+ if (!fd.ordered)
+ return;
+
+ /* Canonicalize sink dependence clauses into one folded clause if
+ possible.
+
+ The basic algorithm is to create a sink vector whose first
+ element is the GCD of all the first elements, and whose remaining
+ elements are the minimum of the subsequent columns.
+
+ We ignore dependence vectors whose first element is zero because
+ such dependencies are known to be executed by the same thread.
+
+ We take into account the direction of the loop, so a minimum
+ becomes a maximum if the loop is iterating backwards. We also
+ ignore sink clauses where the loop direction is unknown, or where
+ the offsets are clearly invalid because they are not a multiple
+ of the loop increment.
+
+ For example:
+
+ #pragma omp for ordered(2)
+ for (i=0; i < N; ++i)
+ for (j=0; j < M; ++j)
+ {
+ #pragma omp ordered \
+ depend(sink:i-8,j-1) \
+ depend(sink:i,j-2) \ // Completely ignored because i+0.
+ depend(sink:i-4,j+3) \
+ depend(sink:i-6,j+2)
+ #pragma omp ordered depend(source)
+ }
+
+ Folded clause is:
+
+ depend(sink:-gcd(8,4,6),min(-1,3,2))
+ -or-
+ depend(sink:-2,-1)
+ */
+
+ /* FIXME: Computing GCD's where the first element is zero is
+ non-trivial in the presence of collapsed loops. Do this later. */
+ gcc_assert (fd.collapse <= 1);
+
+ vec<wide_int> folded_deps;
+ folded_deps.create (len);
+ folded_deps.quick_grow_cleared (len);
+ /* Bitmap representing dimensions in the final dependency vector that
+ have been set. */
+ sbitmap folded_deps_used = sbitmap_alloc (len);
+ bitmap_clear (folded_deps_used);
+ /* TRUE if the first dimension's offset is negative. */
+ bool neg_offset_p = false;
+
+ /* ?? We need to save the original iteration variables stored in the
+ depend clauses, because those in fd.loops[].v have already been
+ gimplified. Perhaps we should use the gimplified versions. ?? */
+ tree *iter_vars = (tree *) alloca (sizeof (tree) * len);
+ memset (iter_vars, 0, sizeof (tree) * len);
+
+ tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
+ tree c;
+ unsigned int i;
+ while ((c = *list_p) != NULL)
+ {
+ bool remove = false;
+
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+ || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+ goto next_ordered_clause;
+
+ tree decls;
+ for (decls = OMP_CLAUSE_DECL (c), i = 0;
+ decls && TREE_CODE (decls) == TREE_LIST;
+ decls = TREE_CHAIN (decls), ++i)
+ {
+ gcc_assert (i < len);
+
+ /* extract_omp_for_data has canonicalized the condition. */
+ gcc_assert (fd.loops[i].cond_code == LT_EXPR
+ || fd.loops[i].cond_code == LE_EXPR
+ || fd.loops[i].cond_code == GT_EXPR
+ || fd.loops[i].cond_code == GE_EXPR);
+ bool forward = fd.loops[i].cond_code == LT_EXPR
+ || fd.loops[i].cond_code == LE_EXPR;
+
+ /* While the committee makes up its mind, bail if we have any
+ non-constant steps. */
+ if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
+ goto lower_omp_ordered_ret;
+
+ wide_int offset = TREE_PURPOSE (decls);
+ if (!iter_vars[i])
+ iter_vars[i] = TREE_VALUE (decls);
+
+ /* Ignore invalid offsets that are not multiples of the step. */
+ if (!wi::multiple_of_p
+ (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
+ UNSIGNED))
+ {
+ warning_at (OMP_CLAUSE_LOCATION (c), 0,
+ "ignoring sink clause with offset that is not "
+ "a multiple of the loop step");
+ remove = true;
+ goto next_ordered_clause;
+ }
+
+ /* Calculate the first dimension. The first dimension of
+ the folded dependency vector is the GCD of the first
+ elements, while ignoring any first elements whose offset
+ is 0. */
+ if (i == 0)
+ {
+ /* Ignore dependence vectors whose first dimension is 0. */
+ if (offset == 0)
+ {
+ remove = true;
+ goto next_ordered_clause;
+ }
+ else
+ {
+ neg_offset_p =
+ wi::neg_p (offset,
+ TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+ if ((forward && !neg_offset_p)
+ || (!forward && neg_offset_p))
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "first offset must be in opposite direction "
+ "of loop iterations");
+ goto lower_omp_ordered_ret;
+ }
+ /* Initialize the first time around. */
+ if (!bitmap_bit_p (folded_deps_used, 0))
+ {
+ bitmap_set_bit (folded_deps_used, 0);
+ folded_deps[0] = wi::abs (offset);
+ }
+ else
+ folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+ }
+ }
+ /* Calculate minimum for the remaining dimensions. */
+ else
+ {
+ if (!bitmap_bit_p (folded_deps_used, i))
+ {
+ bitmap_set_bit (folded_deps_used, i);
+ folded_deps[i] = offset;
+ }
+ else if ((forward && wi::lts_p (offset, folded_deps[i]))
+ || (!forward && wi::gts_p (offset, folded_deps[i])))
+ folded_deps[i] = offset;
+ }
+ }
+
+ remove = true;
+
+ next_ordered_clause:
+ if (remove)
+ *list_p = OMP_CLAUSE_CHAIN (c);
+ else
+ list_p = &OMP_CLAUSE_CHAIN (c);
+ }
+
+ for (i = 0; i < len; ++i)
+ if (!bitmap_bit_p (folded_deps_used, i))
+ break;
+ if (i == len)
+ {
+ if (neg_offset_p)
+ folded_deps[0] = -folded_deps[0];
+
+ tree vec = NULL;
+ i = len;
+ do
+ {
+ i--;
+ vec = tree_cons (wide_int_to_tree (TREE_TYPE (fd.loops[i].v),
+ folded_deps[i]),
+ iter_vars[i], vec);
+ }
+ while (i);
+
+ c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+ OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+ OMP_CLAUSE_DECL (c) = vec;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_ordered_clauses (ord_stmt);
+ *gimple_omp_ordered_clauses_ptr (ord_stmt) = c;
+ }
+
+ lower_omp_ordered_ret:
+ sbitmap_free (folded_deps_used);
+ folded_deps.release ();
+}
+
+
/* Expand code for an OpenMP ordered directive. */
static void
@@ -11979,6 +12192,8 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gcall *x;
gbind *bind;
+ lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+
push_gimplify_context ();
block = make_node (BLOCK);
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
/* Test that we adjust pointer offsets for sink variables
correctly. */
@@ -22,4 +22,4 @@ funk (foo *begin, foo *end)
}
}
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "omplower" } } */
@@ -12,6 +12,7 @@ void baz ()
{
#pragma omp ordered depend(sink:i-3,j)
bar (i, j, 0);
+#pragma omp ordered depend(source)
}
}
@@ -8,7 +8,7 @@ typedef struct {
char stuff[400];
} foo;
-foo *p, *q, *r;
+foo *end, *begin, *p;
template<int N>
void
@@ -16,7 +16,7 @@ funk ()
{
int i,j;
#pragma omp parallel for ordered(1)
- for (p=q; p < q; p--)
+ for (p=end; p > begin; p--)
{
#pragma omp ordered depend(sink:p+1)
void bar ();
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test depend(sink) clause folding. */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+ for (i=0; i < N; i++)
+ for (j=0; j < N; ++j)
+ {
+/* We remove the (sink:i,j-2) by virtue of it the i+0. The remaining
+ clauses get folded with a GCD of -2 for `i' and a minimum of -1 for
+ 'j'. */
+#pragma omp ordered \
+ depend(sink:i-8,j-1) \
+ depend(sink:i, j-2) \
+ depend(sink:i-4,j+3) \
+ depend(sink:i-6,j+2)
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\)" 1 "omplower" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+ for (i=0; i < N; i += 3)
+ for (j=0; j < N; ++j)
+ {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+ pointer sizes. */
+
+typedef struct {
+ char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+ foo *p;
+#pragma omp parallel for ordered(1)
+ for (p=end; p > begin; p--)
+ {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+ void bar ();
+ bar();
+#pragma omp ordered depend(source)
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "omplower" } } */
@@ -514,6 +514,7 @@ namespace wi
BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
WI_BINARY_RESULT (T1, T2) *);
+ BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@ wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
return quotient;
}
+/* Compute the greatest common divisor of two numbers A and B using
+ Euclid's algorithm. */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+ T1 x, y, z;
+
+ x = wi::abs (a);
+ y = wi::abs (b);
+
+ while (gt_p (x, 0, sgn))
+ {
+ z = mod_trunc (y, x, sgn);
+ y = x;
+ x = z;
+ }
+
+ return y;
+}
+
/* Compute X / Y, rouding towards 0, and return the remainder.
Treat X and Y as having the signedness given by SGN. Indicate
in *OVERFLOW if the division overflows. */