[gomp4.1] fold ordered depend(sink) clauses
diff mbox

Message ID 55BBC2E1.4070508@redhat.com
State New
Headers show

Commit Message

Aldy Hernandez July 31, 2015, 6:48 p.m. UTC
On 07/31/2015 09:38 AM, Jakub Jelinek wrote:
> On Thu, Jul 30, 2015 at 07:27:51PM -0700, Aldy Hernandez wrote:
>> +static gimple
>> +gimplify_omp_ordered (tree expr, gimple_seq body)
>> +{
>> +  tree c, decls;
>> +  int failures = 0;
>> +  unsigned int i;
>> +
>> +  if (gimplify_omp_ctxp)
>> +    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
>> +      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
>> +	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
>> +	{
>> +	  bool fail = false;
>> +	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
>> +	       decls && TREE_CODE (decls) == TREE_LIST;
>> +	       decls = TREE_CHAIN (decls), ++i)
>> +	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
>> +		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
>> +	      {
>> +		error_at (OMP_CLAUSE_LOCATION (c),
>> +			  "variable %qE is not an iteration "
>> +			  "of outermost loop %d, expected %qE",
>> +			  TREE_VALUE (decls), i + 1,
>> +			  gimplify_omp_ctxp->loop_iter_var[i]);
>> +		fail = true;
>> +		failures++;
>> +	      }
>> +	  /* Avoid being too redundant.  */
>> +	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
>> +	    {
>> +	      error_at (OMP_CLAUSE_LOCATION (c),
>> +			"number of variables in depend(sink) "
>> +			"clause does not match number of "
>> +			"iteration variables");
>> +	      failures++;
>> +	    }
>
> failures seems to be a write only variable.
> Perhaps if fail is true (set it to true after this error too),
> don't create the ordered at all?  Or drop the bogus clauses.

Oops.  Left over from when we were doing everything in the gimplifier.

I think we can just emit a NOP when error here.  There's no sense in 
removing individual clauses and all that since lower_omp_1 is going to 
ignore sources with error anyhow:

   /* If we have issued syntax errors, avoid doing any heavy lifting.
      Just replace the OMP directives with a NOP to avoid
      confusing RTL expansion.  */
   if (seen_error () && is_gimple_omp (stmt))
     {
       gsi_replace (gsi_p, gimple_build_nop (), true);
       return;
     }

>
>> +
>> +  /* ?? This is stupid.  We need to call extract_omp_for_data just
>> +     to get the number of ordered loops... */
>> +  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, NULL);
>> +  if (!fd.ordered)
>> +    return;
>> +  struct omp_for_data_loop *loops
>> +    = (struct omp_for_data_loop *)
>> +    alloca (fd.ordered * sizeof (struct omp_for_data_loop));
>
> You can do just what expand_omp_for does:
>    struct omp_for_data fd;
>    struct omp_for_data_loop *loops;
>
>    loops
>      = (struct omp_for_data_loop *)
>        alloca (gimple_omp_for_collapse (ctx->outer->stmt)
>                * sizeof (struct omp_for_data_loop));
>    extract_omp_for_data (as_a <gomp_for *> (ctx->outer_stmt), &fd, loops);

Ah!  The collapse of the the outer statement!  Done.

>
>> +     For example:
>> +
>> +	     for (i=0; i < N; ++i)
>> +		depend(sink:i-8,j-1)
>> +		depend(sink:i,j-2)	// Completely ignored because i+0.
>> +		depend(sink:i-4,j+3)
>> +		depend(sink:i-6,j+2)
>
> Even when writing comments, it is better to make it valid:
> 	#pragma omp for ordered(2)
> 	for (i=0; i < N; ++i)
> 	  for (j=0; j < M; ++j)
> 	    {
> 	      #pragma omp ordered \
> 		depend(sink:i-8,j-1) \
> 		depend(sink:i,j-2) \	// Completely ignored because i+0.
> 		depend(sink:i-4,j+3) \
> 		depend(sink:i-6,j+2)
> 	      #pragma omp ordered depend(source)
> 	    }

You have _got_ to be kidding?  Really?  Oh well...done.

>
>> +
>> +     Folded clause is:
>> +
>> +        depend(sink:-gcd(8,4,6),min(-1,3,2))
>
> Spaces here instead of desirable tab.

Done.

>
>> +	  -or-
>> +	depend(sink:-2,-1)
>> +  */
>> +
>> +  /* FIXME: Computing GCD's where the first element is zero is
>> +     non-trivial in the presence of collapsed loops.  Do this later.  */
>> +  if (fd.collapse > 1)
>> +    return;
>
> Better ad an gcc_assert for now.

Done.

>
>> +	  enum {
>> +	    DIR_UNKNOWN,
>> +	    DIR_FORWARD,
>> +	    DIR_BACKWARD
>> +	  } loop_dir;
>> +	  switch (fd.loops[i].cond_code)
>> +	    {
>> +	    case LT_EXPR:
>> +	    case LE_EXPR:
>> +	      loop_dir = DIR_FORWARD;
>> +	      break;
>> +	    case GT_EXPR:
>> +	    case GE_EXPR:
>> +	      loop_dir = DIR_BACKWARD;
>> +	      break;
>> +	    default:
>> +	      loop_dir = DIR_UNKNOWN;
>> +	      gcc_unreachable ();
>> +	    }
>
> I think there is no point in doing this, extract_omp_for_data
> canonicalizes cond_code already, so it is always
> LT_EXPR or GT_EXPR.  So just gcc_assert it is these two and

Well, [LG]E_EXPR can sneak through for i >= fd->collapse in 
extract_omp_for_data:

	case LE_EXPR:
	  if (i >= fd->collapse)
	    break;
	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
	    loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
	  else
	    loop->n2 = fold_build2_loc (loc,
				    PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
				    build_int_cst (TREE_TYPE (loop->n2), 1));
	  loop->cond_code = LT_EXPR;
	  break;

How about we assert that it's LE/LT/GT/GE and set a `bool forward' as 
suggested?

> compare it directly, or add bool forward = fd.loops[i].cond_code == LT_EXPR;
> ?
>> +
>> +	  /* While the committee makes up its mind, bail if we have any
>> +	     non-constant steps.  */
>> +	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
>> +	    goto gimplify_omp_ordered_ret;
>
> Misnamed label.  lower instead?

Fixed.

>
>> +	  wide_int offset = TREE_PURPOSE (decls);
>> +	  if (!iter_vars[i])
>> +	    iter_vars[i] = TREE_VALUE (decls);
>> +
>> +	  /* Ignore invalid offsets that are not multiples of the step.  */
>> +	  if (!wi::multiple_of_p
>> +	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
>> +	       UNSIGNED))
>
> What does wi::abs do with very large unsigned integers?
>    #pragma omp ordered(2)
>    for (unsigned int i = 64; i > 32; i += -1)
>      for (unsigned int j = 0; j < 32; j++)
> ?
> step of i is 0xffffffff and cond_code is GT_EXPR.  I suppose for unsigned
> steps you don't want to use wi::abs, but just negate the step if
> iterating downward?

Seems to work just fine for:

    #pragma omp parallel for ordered(2)
     for (unsigned int i = N; i > 32; i += -1)
           for (unsigned int j = 0; j < 32; j++)
     {
#pragma omp ordered depend(sink:i+1,j-2) depend(sink:i+2,j-3)
...
...

(gdb) call debug_generic_stmt(fd.loops[i].step)
4294967295

(gdb) p/x 4294967295
$17 = 0xffffffff

The conversion from tree -> wide-int correctly interprets this:

(gdb) print x
$18 = (const generic_wide_int<wide_int_storage> &) @0x7fffffffd5d0: 
{<wide_int_storage> = {val = {-1, 29799175824, 140737488344576}, len = 1,
     precision = 32}, static is_sign_extended = <optimized out>}
(gdb) call x.dump()
[0xffffffffffffffff], precision = 32

..and then negates it correctly.  For the i+1,j-2, we end up with 
(correctly):

(gdb) call x.dump()
[0x2], precision = 32
(gdb) call y.dump()
[0x1], precision = 32

Then the folded dependence is depend(sink:i+1,j+4294967293), which is -3 
and what I would expect.  Unless I'm missing something...

How does the attached look?

Aldy
commit 92e33750f31c7954b8a92763d20630f4e1af9b6b
Author: Aldy Hernandez <aldyh@redhat.com>
Date:   Wed Jul 29 13:39:06 2015 -0700

    	* wide-int.h (wi::gcd): New.
    	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
    	loop_iter_var.
    	(delete_omp_context): Same.
    	(gimplify_expr): Move code handling OMP_ORDERED into...
    	(gimplify_omp_ordered): ...here.  New.
    	* omp-low.c (lower_omp_ordered_clauses): New.
    	(lower_omp_ordered): Call lower_omp_ordered_clauses.
    testsuite/
    	* gcc.dg/gomp/sink-fold-1.c: New.
    	* gcc.dg/gomp/sink-fold-2.c: New.
    	* gcc.dg/gomp/sink-fold-3.c: New.
    	* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
    	* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
    	direction.  Make variables more readable.

Comments

Jakub Jelinek July 31, 2015, 7:28 p.m. UTC | #1
On Fri, Jul 31, 2015 at 11:48:01AM -0700, Aldy Hernandez wrote:
> commit 92e33750f31c7954b8a92763d20630f4e1af9b6b
> Author: Aldy Hernandez <aldyh@redhat.com>
> Date:   Wed Jul 29 13:39:06 2015 -0700
> 
>     	* wide-int.h (wi::gcd): New.
>     	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
>     	loop_iter_var.
>     	(delete_omp_context): Same.
>     	(gimplify_expr): Move code handling OMP_ORDERED into...
>     	(gimplify_omp_ordered): ...here.  New.
>     	* omp-low.c (lower_omp_ordered_clauses): New.
>     	(lower_omp_ordered): Call lower_omp_ordered_clauses.
>     testsuite/
>     	* gcc.dg/gomp/sink-fold-1.c: New.
>     	* gcc.dg/gomp/sink-fold-2.c: New.
>     	* gcc.dg/gomp/sink-fold-3.c: New.
>     	* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
>     	* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
>     	direction.  Make variables more readable.

Ok for branch, thanks.

	Jakub

Patch
diff mbox

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2331001..250442f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -153,7 +153,7 @@  struct gimplify_omp_ctx
   splay_tree variables;
   hash_set<tree> *privatized_types;
   /* Iteration variables in an OMP_FOR.  */
-  vec<tree> iter_vars;
+  vec<tree> loop_iter_var;
   location_t location;
   enum omp_clause_default_kind default_kind;
   enum omp_region_type region_type;
@@ -392,7 +392,7 @@  delete_omp_context (struct gimplify_omp_ctx *c)
 {
   splay_tree_delete (c->variables);
   delete c->privatized_types;
-  c->iter_vars.release ();
+  c->loop_iter_var.release ();
   XDELETE (c);
 }
 
@@ -7490,8 +7490,15 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
-  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
-				       (OMP_FOR_INIT (for_stmt)));
+
+  tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
+  bool is_doacross = false;
+  if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+    {
+      is_doacross = true;
+      gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+					       (OMP_FOR_INIT (for_stmt)));
+    }
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7500,11 +7507,14 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_assert (DECL_P (decl));
       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
 		  || POINTER_TYPE_P (TREE_TYPE (decl)));
-      if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
-	gimplify_omp_ctxp->iter_vars.quick_push
-	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
-      else
-	gimplify_omp_ctxp->iter_vars.quick_push (decl);
+      if (is_doacross)
+	{
+	  if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
+	    gimplify_omp_ctxp->loop_iter_var.quick_push
+	      (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+	  else
+	    gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
+	}
 
       /* Make sure the iteration variable is private.  */
       tree c = NULL_TREE;
@@ -8387,6 +8397,56 @@  gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
   return GS_ALL_DONE;
 }
 
+/* Gimplify an OMP_ORDERED construct.  EXPR is the tree version.  BODY
+   is the OMP_BODY of the original EXPR (which has already been
+   gimplified so it's not present in the EXPR).
+
+   Return the gimplified GIMPLE_OMP_ORDERED tuple.  */
+
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+  tree c, decls;
+  int failures = 0;
+  unsigned int i;
+
+  if (gimplify_omp_ctxp)
+    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+	{
+	  bool fail = false;
+	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	       decls && TREE_CODE (decls) == TREE_LIST;
+	       decls = TREE_CHAIN (decls), ++i)
+	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+	      {
+		error_at (OMP_CLAUSE_LOCATION (c),
+			  "variable %qE is not an iteration "
+			  "of outermost loop %d, expected %qE",
+			  TREE_VALUE (decls), i + 1,
+			  gimplify_omp_ctxp->loop_iter_var[i]);
+		fail = true;
+		failures++;
+	      }
+	  /* Avoid being too redundant.  */
+	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+	    {
+	      error_at (OMP_CLAUSE_LOCATION (c),
+			"number of variables in depend(sink) "
+			"clause does not match number of "
+			"iteration variables");
+	      fail = true;
+	      failures++;
+	    }
+	}
+
+  if (failures)
+    return gimple_build_nop ();
+  return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
 /* Convert the GENERIC expression tree *EXPR_P to GIMPLE.  If the
    expression produces a value to be used as an operand inside a GIMPLE
    statement, the value will be stored back in *EXPR_P.  This value will
@@ -9200,38 +9260,7 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		}
 		break;
 	      case OMP_ORDERED:
-		if (gimplify_omp_ctxp)
-		  for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
-		       c; c = OMP_CLAUSE_CHAIN (c))
-		    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
-			&& OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
-		      {
-			unsigned int n = 0;
-			bool fail = false;
-			for (tree decls = OMP_CLAUSE_DECL (c);
-			     decls && TREE_CODE (decls) == TREE_LIST;
-			     decls = TREE_CHAIN (decls), ++n)
-			  if (n < gimplify_omp_ctxp->iter_vars.length ()
-			      && TREE_VALUE (decls)
-			      != gimplify_omp_ctxp->iter_vars[n])
-			    {
-			      error_at (OMP_CLAUSE_LOCATION (c),
-					"variable %qE is not an iteration "
-					"of outermost loop %d, expected %qE",
-					TREE_VALUE (decls), n + 1,
-					gimplify_omp_ctxp->iter_vars[n]);
-			      fail = true;
-			    }
-			/* Avoid being too redundant.  */
-			if (!fail
-			    && n != gimplify_omp_ctxp->iter_vars.length ())
-			  error_at (OMP_CLAUSE_LOCATION (c),
-			     "number of variables in depend(sink) clause "
-			     "does not match number of iteration variables");
-		      }
-
-		g = gimple_build_omp_ordered (body,
-					      OMP_ORDERED_CLAUSES (*expr_p));
+		g = gimplify_omp_ordered (*expr_p, body);
 		break;
 	      case OMP_CRITICAL:
 		gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e7d21ea..489e939 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11969,6 +11969,219 @@  lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 
+/* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible.  */
+
+static void
+lower_omp_ordered_clauses (gomp_ordered *ord_stmt, omp_context *ctx)
+{
+  struct omp_for_data fd;
+  if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
+    return;
+
+  unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
+  if (!len)
+    return;
+  struct omp_for_data_loop *loops
+    = (struct omp_for_data_loop *)
+    alloca (len * sizeof (struct omp_for_data_loop));
+  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
+  if (!fd.ordered)
+    return;
+
+  /* Canonicalize sink dependence clauses into one folded clause if
+     possible.
+
+     The basic algorithm is to create a sink vector whose first
+     element is the GCD of all the first elements, and whose remaining
+     elements are the minimum of the subsequent columns.
+
+     We ignore dependence vectors whose first element is zero because
+     such dependencies are known to be executed by the same thread.
+
+     We take into account the direction of the loop, so a minimum
+     becomes a maximum if the loop is iterating backwards.  We also
+     ignore sink clauses where the loop direction is unknown, or where
+     the offsets are clearly invalid because they are not a multiple
+     of the loop increment.
+
+     For example:
+
+	#pragma omp for ordered(2)
+	for (i=0; i < N; ++i)
+	  for (j=0; j < M; ++j)
+	    {
+	      #pragma omp ordered \
+		depend(sink:i-8,j-1) \
+		depend(sink:i,j-2) \	// Completely ignored because i+0.
+		depend(sink:i-4,j+3) \
+		depend(sink:i-6,j+2)
+	      #pragma omp ordered depend(source)
+	    }
+
+     Folded clause is:
+
+	depend(sink:-gcd(8,4,6),min(-1,3,2))
+	  -or-
+	depend(sink:-2,-1)
+  */
+
+  /* FIXME: Computing GCD's where the first element is zero is
+     non-trivial in the presence of collapsed loops.  Do this later.  */
+  gcc_assert (fd.collapse <= 1);
+
+  vec<wide_int> folded_deps;
+  folded_deps.create (len);
+  folded_deps.quick_grow_cleared (len);
+  /* Bitmap representing dimensions in the final dependency vector that
+     have been set.  */
+  sbitmap folded_deps_used = sbitmap_alloc (len);
+  bitmap_clear (folded_deps_used);
+  /* TRUE if the first dimension's offset is negative.  */
+  bool neg_offset_p = false;
+
+  /* ?? We need to save the original iteration variables stored in the
+     depend clauses, because those in fd.loops[].v have already been
+     gimplified.  Perhaps we should use the gimplified versions. ??  */
+  tree *iter_vars = (tree *) alloca (sizeof (tree) * len);
+  memset (iter_vars, 0, sizeof (tree) * len);
+
+  tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
+  tree c;
+  unsigned int i;
+  while ((c = *list_p) != NULL)
+    {
+      bool remove = false;
+
+      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+	  || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+	goto next_ordered_clause;
+
+      tree decls;
+      for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	   decls && TREE_CODE (decls) == TREE_LIST;
+	   decls = TREE_CHAIN (decls), ++i)
+	{
+	  gcc_assert (i < len);
+
+	  /* extract_omp_for_data has canonicalized the condition.  */
+	  gcc_assert (fd.loops[i].cond_code == LT_EXPR
+		      || fd.loops[i].cond_code == LE_EXPR
+		      || fd.loops[i].cond_code == GT_EXPR
+		      || fd.loops[i].cond_code == GE_EXPR);
+	  bool forward = fd.loops[i].cond_code == LT_EXPR
+	    || fd.loops[i].cond_code == LE_EXPR;
+
+	  /* While the committee makes up its mind, bail if we have any
+	     non-constant steps.  */
+	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
+	    goto lower_omp_ordered_ret;
+
+	  wide_int offset = TREE_PURPOSE (decls);
+	  if (!iter_vars[i])
+	    iter_vars[i] = TREE_VALUE (decls);
+
+	  /* Ignore invalid offsets that are not multiples of the step.  */
+	  if (!wi::multiple_of_p
+	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
+	       UNSIGNED))
+	    {
+	      warning_at (OMP_CLAUSE_LOCATION (c), 0,
+			  "ignoring sink clause with offset that is not "
+			  "a multiple of the loop step");
+	      remove = true;
+	      goto next_ordered_clause;
+	    }
+
+	  /* Calculate the first dimension.  The first dimension of
+	     the folded dependency vector is the GCD of the first
+	     elements, while ignoring any first elements whose offset
+	     is 0.  */
+	  if (i == 0)
+	    {
+	      /* Ignore dependence vectors whose first dimension is 0.  */
+	      if (offset == 0)
+		{
+		  remove = true;
+		  goto next_ordered_clause;
+		}
+	      else
+		{
+		  neg_offset_p =
+		    wi::neg_p (offset,
+			       TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+		  if ((forward && !neg_offset_p)
+		      || (!forward && neg_offset_p))
+		    {
+		      error_at (OMP_CLAUSE_LOCATION (c),
+				"first offset must be in opposite direction "
+				"of loop iterations");
+		      goto lower_omp_ordered_ret;
+		    }
+		  /* Initialize the first time around.  */
+		  if (!bitmap_bit_p (folded_deps_used, 0))
+		    {
+		      bitmap_set_bit (folded_deps_used, 0);
+		      folded_deps[0] = wi::abs (offset);
+		    }
+		  else
+		    folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+		}
+	    }
+	  /* Calculate minimum for the remaining dimensions.  */
+	  else
+	    {
+	      if (!bitmap_bit_p (folded_deps_used, i))
+		{
+		  bitmap_set_bit (folded_deps_used, i);
+		  folded_deps[i] = offset;
+		}
+	      else if ((forward && wi::lts_p (offset, folded_deps[i]))
+		       || (!forward && wi::gts_p (offset, folded_deps[i])))
+		folded_deps[i] = offset;
+	    }
+	}
+
+      remove = true;
+
+    next_ordered_clause:
+      if (remove)
+	*list_p = OMP_CLAUSE_CHAIN (c);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (c);
+    }
+
+  for (i = 0; i < len; ++i)
+    if (!bitmap_bit_p (folded_deps_used, i))
+      break;
+  if (i == len)
+    {
+      if (neg_offset_p)
+	folded_deps[0] = -folded_deps[0];
+
+      tree vec = NULL;
+      i = len;
+      do
+	{
+	  i--;
+	  vec = tree_cons (wide_int_to_tree (TREE_TYPE (fd.loops[i].v),
+					     folded_deps[i]),
+			   iter_vars[i], vec);
+	}
+      while (i);
+
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+      OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+      OMP_CLAUSE_DECL (c) = vec;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_ordered_clauses (ord_stmt);
+      *gimple_omp_ordered_clauses_ptr (ord_stmt) = c;
+    }
+
+ lower_omp_ordered_ret:
+  sbitmap_free (folded_deps_used);
+  folded_deps.release ();
+}
+
+
 /* Expand code for an OpenMP ordered directive.  */
 
 static void
@@ -11979,6 +12192,8 @@  lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gcall *x;
   gbind *bind;
 
+  lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+
   push_gimplify_context ();
 
   block = make_node (BLOCK);
diff --git a/gcc/testsuite/c-c++-common/gomp/sink-4.c b/gcc/testsuite/c-c++-common/gomp/sink-4.c
index 7934de2..111178b 100644
--- a/gcc/testsuite/c-c++-common/gomp/sink-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/sink-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
 
 /* Test that we adjust pointer offsets for sink variables
    correctly.  */
@@ -22,4 +22,4 @@  funk (foo *begin, foo *end)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/sink-1.C b/gcc/testsuite/g++.dg/gomp/sink-1.C
index a83adbf..982d36c 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-1.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-1.C
@@ -12,6 +12,7 @@  void baz ()
       {
 #pragma omp ordered depend(sink:i-3,j)
 	bar (i, j, 0);
+#pragma omp ordered depend(source)
       }
 }
 
diff --git a/gcc/testsuite/g++.dg/gomp/sink-3.C b/gcc/testsuite/g++.dg/gomp/sink-3.C
index 83a742e..4271d66 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-3.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-3.C
@@ -8,7 +8,7 @@  typedef struct {
     char stuff[400];
 } foo;
 
-foo *p, *q, *r;
+foo *end, *begin, *p;
 
 template<int N>
 void
@@ -16,7 +16,7 @@  funk ()
 {
   int i,j;
 #pragma omp parallel for ordered(1)
-  for (p=q; p < q; p--)
+  for (p=end; p > begin; p--)
     {
 #pragma omp ordered depend(sink:p+1)
       void bar ();
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
new file mode 100644
index 0000000..f2961b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test depend(sink) clause folding.  */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i++)
+    for (j=0; j < N; ++j)
+    {
+/* We remove the (sink:i,j-2) by virtue of it the i+0.  The remaining
+   clauses get folded with a GCD of -2 for `i' and a minimum of -1 for
+   'j'.  */
+#pragma omp ordered \
+  depend(sink:i-8,j-1) \
+  depend(sink:i, j-2) \
+  depend(sink:i-4,j+3) \
+  depend(sink:i-6,j+2)
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
new file mode 100644
index 0000000..b3b4ac7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i += 3)
+    for (j=0; j < N; ++j)
+    {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
new file mode 100644
index 0000000..4d6293c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+   pointer sizes.  */
+
+typedef struct {
+    char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+  foo *p;
+#pragma omp parallel for ordered(1)
+  for (p=end; p > begin; p--)
+    {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+      void bar ();
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "omplower" } } */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index d8f7b46..c20db61 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -514,6 +514,7 @@  namespace wi
   BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
 				WI_BINARY_RESULT (T1, T2) *);
+  BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
   BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
   BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@  wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
   return quotient;
 }
 
+/* Compute the greatest common divisor of two numbers A and B using
+   Euclid's algorithm.  */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+  T1 x, y, z;
+
+  x = wi::abs (a);
+  y = wi::abs (b);
+
+  while (gt_p (x, 0, sgn))
+    {
+      z = mod_trunc (y, x, sgn);
+      y = x;
+      x = z;
+    }
+
+  return y;
+}
+
 /* Compute X / Y, rouding towards 0, and return the remainder.
    Treat X and Y as having the signedness given by SGN.  Indicate
    in *OVERFLOW if the division overflows.  */