Patchwork [committed] Fix expansion of some #pragma omp for loops

login
register
mail settings
Submitter Jakub Jelinek
Date May 16, 2013, 10:52 a.m.
Message ID <20130516105229.GE1377@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/244261/
State New
Headers show

Comments

Jakub Jelinek - May 16, 2013, 10:52 a.m.
Hi!

As the testcase show, if in a schedule(static) (the default schedule)
or schedule(static,N) non-collapsed loop with unsigned integral iterator
the loop condition is false upon entering the loop,
but (n2 + step-1 - n1) / step is not 0, we could run the loop body, possibly
many times, rather than never.
Similarly for collapsed loops, if any of the collapsed loops had the
condition false initially, but we computed non-zero number of iterations.
For non-collapsed loops with signed iterators or pointer iterators this
isn't a problem, because (n2 + step-1 - n1) / step is then evaluated in a
signed integer type and thus for the condition initially false that count
is negative and we never loop.  Similarly, for the cases where we call the
runtime, the runtime already checks for this case properly.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
and 4.8 branch.

2013-05-16  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (extract_omp_for_data): For collapsed loops,
	if at least one of the loops is known at compile time to
	iterate zero times, set count to 0.
	(expand_omp_regimplify_p): New function.
	(expand_omp_for_generic): For collapsed loops, if at least
	one of the loops isn't known to iterate at least once,
	add runtime check with setting count to 0.
	(expand_omp_for_static_nochunk, expand_omp_for_static_chunk):
	For unsigned types if it isn't known at compile time that
	the loop will iterate at least once, add runtime check to bypass
	the whole loop if initial condition isn't true.

	* testsuite/libgomp.c/loop-13.c: New test.
	* testsuite/libgomp.c/loop-14.c: New test.
	* testsuite/libgomp.c/loop-15.c: New test.
	* testsuite/libgomp.c++/loop-13.C: New test.
	* testsuite/libgomp.c++/loop-14.C: New test.
	* testsuite/libgomp.c++/loop-15.C: New test.


	Jakub

Patch

--- gcc/omp-low.c.jj	2013-04-30 10:45:10.000000000 +0200
+++ gcc/omp-low.c	2013-05-16 08:43:49.535590890 +0200
@@ -398,11 +398,16 @@  extract_omp_for_data (gimple for_stmt, s
 
       if (collapse_count && *collapse_count == NULL)
 	{
-	  if ((i == 0 || count != NULL_TREE)
-	      && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
-	      && TREE_CONSTANT (loop->n1)
-	      && TREE_CONSTANT (loop->n2)
-	      && TREE_CODE (loop->step) == INTEGER_CST)
+	  t = fold_binary (loop->cond_code, boolean_type_node,
+			   fold_convert (TREE_TYPE (loop->v), loop->n1),
+			   fold_convert (TREE_TYPE (loop->v), loop->n2));
+	  if (t && integer_zerop (t))
+	    count = build_zero_cst (long_long_unsigned_type_node);
+	  else if ((i == 0 || count != NULL_TREE)
+		   && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+		   && TREE_CONSTANT (loop->n1)
+		   && TREE_CONSTANT (loop->n2)
+		   && TREE_CODE (loop->step) == INTEGER_CST)
 	    {
 	      tree itype = TREE_TYPE (loop->v);
 
@@ -435,7 +440,7 @@  extract_omp_for_data (gimple for_stmt, s
 	      if (TREE_CODE (count) != INTEGER_CST)
 		count = NULL_TREE;
 	    }
-	  else
+	  else if (count && !integer_zerop (count))
 	    count = NULL_TREE;
 	}
     }
@@ -3387,6 +3392,25 @@  optimize_omp_library_calls (gimple entry
       }
 }
 
+/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
+   regimplified.  */
+
+static tree
+expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
+{
+  tree t = *tp;
+
+  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
+  if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t))
+    return t;
+
+  if (TREE_CODE (t) == ADDR_EXPR)
+    recompute_tree_invariant_for_addr_expr (t);
+
+  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
+  return NULL_TREE;
+}
+
 /* Expand the OpenMP parallel or task directive starting at REGION.  */
 
 static void
@@ -3662,22 +3686,29 @@  expand_omp_taskreg (struct omp_region *r
 
     we generate pseudocode
 
+	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
 	if (cond3 is <)
 	  adj = STEP3 - 1;
 	else
 	  adj = STEP3 + 1;
 	count3 = (adj + N32 - N31) / STEP3;
+	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
 	if (cond2 is <)
 	  adj = STEP2 - 1;
 	else
 	  adj = STEP2 + 1;
 	count2 = (adj + N22 - N21) / STEP2;
+	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
 	if (cond1 is <)
 	  adj = STEP1 - 1;
 	else
 	  adj = STEP1 + 1;
 	count1 = (adj + N12 - N11) / STEP1;
 	count = count1 * count2 * count3;
+	goto Z1;
+    Z0:
+	count = 0;
+    Z1:
 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
 	if (more) goto L0; else goto L3;
     L0:
@@ -3785,6 +3816,9 @@  expand_omp_for_generic (struct omp_regio
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
   if (fd->collapse > 1)
     {
+      basic_block zero_iter_bb = NULL;
+      int first_zero_iter = -1;
+
       /* collapsed loops need work for expansion in SSA form.  */
       gcc_assert (!gimple_in_ssa_p (cfun));
       counts = (tree *) alloca (fd->collapse * sizeof (tree));
@@ -3792,6 +3826,51 @@  expand_omp_for_generic (struct omp_regio
 	{
 	  tree itype = TREE_TYPE (fd->loops[i].v);
 
+	  if (SSA_VAR_P (fd->loop.n2)
+	      && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
+				    fold_convert (itype, fd->loops[i].n1),
+				    fold_convert (itype, fd->loops[i].n2)))
+		  == NULL_TREE || !integer_onep (t)))
+	    {
+	      tree n1, n2;
+	      n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+	      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+	      n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+	      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
+					     true, GSI_SAME_STMT);
+	      stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+					NULL_TREE, NULL_TREE);
+	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+	      if (walk_tree (gimple_cond_lhs_ptr (stmt),
+			     expand_omp_regimplify_p, NULL, NULL)
+		  || walk_tree (gimple_cond_rhs_ptr (stmt),
+				expand_omp_regimplify_p, NULL, NULL))
+		{
+		  gsi = gsi_for_stmt (stmt);
+		  gimple_regimplify_operands (stmt, &gsi);
+		}
+	      e = split_block (entry_bb, stmt);
+	      if (zero_iter_bb == NULL)
+		{
+		  first_zero_iter = i;
+		  zero_iter_bb = create_empty_bb (entry_bb);
+		  if (current_loops)
+		    add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
+		  gsi = gsi_after_labels (zero_iter_bb);
+		  stmt = gimple_build_assign (fd->loop.n2,
+					      build_zero_cst (type));
+		  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+		  set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
+					   entry_bb);
+		}
+	      ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
+	      ne->probability = REG_BR_PROB_BASE / 2000 - 1;
+	      e->flags = EDGE_TRUE_VALUE;
+	      e->probability = REG_BR_PROB_BASE - ne->probability;
+	      entry_bb = e->dest;
+	      gsi = gsi_last_bb (entry_bb);
+	    }
 	  if (POINTER_TYPE_P (itype))
 	    itype = signed_type_for (itype);
 	  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
@@ -3836,6 +3915,23 @@  expand_omp_for_generic (struct omp_regio
 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
+      if (zero_iter_bb)
+	{
+	  /* Some counts[i] vars might be uninitialized if
+	     some loop has zero iterations.  But the body shouldn't
+	     be executed in that case, so just avoid uninit warnings.  */
+	  for (i = first_zero_iter; i < fd->collapse; i++)
+	    if (SSA_VAR_P (counts[i]))
+	      TREE_NO_WARNING (counts[i]) = 1;
+	  gsi_prev (&gsi);
+	  e = split_block (entry_bb, gsi_stmt (gsi));
+	  entry_bb = e->dest;
+	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
+	  gsi = gsi_last_bb (entry_bb);
+	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
+				   get_immediate_dominator (CDI_DOMINATORS,
+							    zero_iter_bb));
+	}
     }
   if (in_combined_parallel)
     {
@@ -4169,6 +4265,7 @@  expand_omp_for_generic (struct omp_regio
 
    where COND is "<" or ">", we generate pseudocode
 
+	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
 	if (cond is <)
 	  adj = STEP - 1;
 	else
@@ -4229,6 +4326,50 @@  expand_omp_for_static_nochunk (struct om
   gsi = gsi_last_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
 
+  t = fold_binary (fd->loop.cond_code, boolean_type_node,
+		   fold_convert (type, fd->loop.n1),
+		   fold_convert (type, fd->loop.n2));
+  if (TYPE_UNSIGNED (type)
+      && (t == NULL_TREE || !integer_onep (t)))
+    {
+      tree n1, n2;
+      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
+      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
+				     true, GSI_SAME_STMT);
+      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
+      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
+				     true, GSI_SAME_STMT);
+      stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
+				NULL_TREE, NULL_TREE);
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      if (walk_tree (gimple_cond_lhs_ptr (stmt),
+		     expand_omp_regimplify_p, NULL, NULL)
+	  || walk_tree (gimple_cond_rhs_ptr (stmt),
+			expand_omp_regimplify_p, NULL, NULL))
+	{
+	  gsi = gsi_for_stmt (stmt);
+	  gimple_regimplify_operands (stmt, &gsi);
+	}
+      ep = split_block (entry_bb, stmt);
+      ep->flags = EDGE_TRUE_VALUE;
+      entry_bb = ep->dest;
+      ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
+      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
+      ep->probability = REG_BR_PROB_BASE / 2000 - 1;
+      if (gimple_in_ssa_p (cfun))
+	{
+	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+	  for (gsi = gsi_start_phis (fin_bb);
+	       !gsi_end_p (gsi); gsi_next (&gsi))
+	    {
+	      gimple phi = gsi_stmt (gsi);
+	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
+			   ep, UNKNOWN_LOCATION);
+	    }
+	}
+      gsi = gsi_last_bb (entry_bb);
+    }
+
   t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
   t = fold_convert (itype, t);
   nthreads = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
@@ -4395,6 +4536,7 @@  expand_omp_for_static_nochunk (struct om
 
    where COND is "<" or ">", we generate pseudocode
 
+	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
 	if (cond is <)
 	  adj = STEP - 1;
 	else
@@ -4460,6 +4602,50 @@  expand_omp_for_static_chunk (struct omp_
   si = gsi_last_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_FOR);
 
+  t = fold_binary (fd->loop.cond_code, boolean_type_node,
+		   fold_convert (type, fd->loop.n1),
+		   fold_convert (type, fd->loop.n2));
+  if (TYPE_UNSIGNED (type)
+      && (t == NULL_TREE || !integer_onep (t)))
+    {
+      tree n1, n2;
+      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
+      n1 = force_gimple_operand_gsi (&si, n1, true, NULL_TREE,
+				     true, GSI_SAME_STMT);
+      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
+      n2 = force_gimple_operand_gsi (&si, n2, true, NULL_TREE,
+				     true, GSI_SAME_STMT);
+      stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
+				NULL_TREE, NULL_TREE);
+      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
+      if (walk_tree (gimple_cond_lhs_ptr (stmt),
+		     expand_omp_regimplify_p, NULL, NULL)
+	  || walk_tree (gimple_cond_rhs_ptr (stmt),
+			expand_omp_regimplify_p, NULL, NULL))
+	{
+	  si = gsi_for_stmt (stmt);
+	  gimple_regimplify_operands (stmt, &si);
+	}
+      se = split_block (entry_bb, stmt);
+      se->flags = EDGE_TRUE_VALUE;
+      entry_bb = se->dest;
+      se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
+      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
+      se->probability = REG_BR_PROB_BASE / 2000 - 1;
+      if (gimple_in_ssa_p (cfun))
+	{
+	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+	  for (si = gsi_start_phis (fin_bb);
+	       !gsi_end_p (si); gsi_next (&si))
+	    {
+	      gimple phi = gsi_stmt (si);
+	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
+			   se, UNKNOWN_LOCATION);
+	    }
+	}
+      si = gsi_last_bb (entry_bb);
+    }
+
   t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
   t = fold_convert (itype, t);
   nthreads = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
--- libgomp/testsuite/libgomp.c/loop-13.c.jj	2013-05-15 23:31:59.460083546 +0200
+++ libgomp/testsuite/libgomp.c/loop-13.c	2013-05-15 23:32:11.584015252 +0200
@@ -0,0 +1,253 @@ 
+/* { dg-do run } */
+
+volatile int ji = 100, ki = 2;
+volatile unsigned int ju = 100, ku = 2;
+volatile long long int jll = 100, kll = 2;
+volatile unsigned long long int jull = 100, kull = 2;
+unsigned long long l;
+
+void
+f0 (void)
+{
+  int i, j, k;
+  unsigned int j2, k2;
+  #pragma omp for reduction(+: l)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = ki + 10; k < ji - 10; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = ki + 10; j < ji - 10; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f1 (void)
+{
+  unsigned int i, j, k;
+  int j2, k2;
+  #pragma omp for reduction(+: l)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = ku; k < ju; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = ku; j < ju; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f2 (void)
+{
+  long long int i, j, k;
+  unsigned long long int j2, k2;
+  #pragma omp for reduction(+: l)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = kll; k < jll; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = kll; j < jll; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f3 (void)
+{
+  unsigned long long int i, j, k;
+  long long int j2, k2;
+  #pragma omp for reduction(+: l)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = kull; k < jull; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3)
+  for (j = kull; j < jull; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  f0 ();
+  f1 ();
+  f2 ();
+  f3 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/loop-14.c.jj	2013-05-15 23:32:23.734946660 +0200
+++ libgomp/testsuite/libgomp.c/loop-14.c	2013-05-15 23:32:47.852810682 +0200
@@ -0,0 +1,253 @@ 
+/* { dg-do run } */
+
+volatile int ji = 100, ki = 2;
+volatile unsigned int ju = 100, ku = 2;
+volatile long long int jll = 100, kll = 2;
+volatile unsigned long long int jull = 100, kull = 2;
+unsigned long long l;
+
+void
+f0 (void)
+{
+  int i, j, k;
+  unsigned int j2, k2;
+  #pragma omp for reduction(+: l) schedule(static, 2)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(static, 2)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = ki + 10; k < ji - 10; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = ki + 10; j < ji - 10; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f1 (void)
+{
+  unsigned int i, j, k;
+  int j2, k2;
+  #pragma omp for reduction(+: l) schedule(static, 2)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(static, 2)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = ku; k < ju; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = ku; j < ju; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f2 (void)
+{
+  long long int i, j, k;
+  unsigned long long int j2, k2;
+  #pragma omp for reduction(+: l) schedule(static, 2)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(static, 2)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = kll; k < jll; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = kll; j < jll; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f3 (void)
+{
+  unsigned long long int i, j, k;
+  long long int j2, k2;
+  #pragma omp for reduction(+: l) schedule(static, 2)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(static, 2)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = kull; k < jull; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(static, 2)
+  for (j = kull; j < jull; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  f0 ();
+  f1 ();
+  f2 ();
+  f3 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/loop-15.c.jj	2013-05-15 23:32:55.239769891 +0200
+++ libgomp/testsuite/libgomp.c/loop-15.c	2013-05-15 23:33:06.200710228 +0200
@@ -0,0 +1,253 @@ 
+/* { dg-do run } */
+
+volatile int ji = 100, ki = 2;
+volatile unsigned int ju = 100, ku = 2;
+volatile long long int jll = 100, kll = 2;
+volatile unsigned long long int jull = 100, kull = 2;
+unsigned long long l;
+
+void
+f0 (void)
+{
+  int i, j, k;
+  unsigned int j2, k2;
+  #pragma omp for reduction(+: l) schedule(runtime)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(runtime)
+  for (i = ji; i < ki; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ji; i < ki; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ji; i < ki; i++)
+      for (k = ki + 10; k < ji - 10; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = ki + 10; j < ji - 10; j++)
+    for (i = ji; i < ki; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f1 (void)
+{
+  unsigned int i, j, k;
+  int j2, k2;
+  #pragma omp for reduction(+: l) schedule(runtime)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(runtime)
+  for (i = ju; i < ku; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = ju; i < ku; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = ju; i < ku; i++)
+      for (k = ku; k < ju; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = ku; j < ju; j++)
+    for (i = ju; i < ku; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f2 (void)
+{
+  long long int i, j, k;
+  unsigned long long int j2, k2;
+  #pragma omp for reduction(+: l) schedule(runtime)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(runtime)
+  for (i = jll; i < kll; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jll; i < kll; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jll; i < kll; i++)
+      for (k = kll; k < jll; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = kll; j < jll; j++)
+    for (i = jll; i < kll; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+void
+f3 (void)
+{
+  unsigned long long int i, j, k;
+  long long int j2, k2;
+  #pragma omp for reduction(+: l) schedule(runtime)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) schedule(runtime)
+  for (i = jull; i < kull; i++)
+    l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j2 = 0; j2 < 4; j2++)
+    for (i = jull; i < kull; i++)
+      for (k2 = 0; k2 < 5; k2 += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = 0; j < 4; j++)
+    for (i = jull; i < kull; i++)
+      for (k = kull; k < jull; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+  #pragma omp parallel for reduction(+: l) collapse(3) schedule(runtime)
+  for (j = kull; j < jull; j++)
+    for (i = jull; i < kull; i++)
+      for (k = 0; k < 5; k += 2)
+	l++;
+  if (l != 0)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  f0 ();
+  f1 ();
+  f2 ();
+  f3 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/loop-13.C.jj	2013-05-15 23:33:46.268483414 +0200
+++ libgomp/testsuite/libgomp.c++/loop-13.C	2013-05-15 23:33:33.280670758 +0200
@@ -0,0 +1,3 @@ 
+/* { dg-do run } */
+
+#include "../libgomp.c/loop-13.c"
--- libgomp/testsuite/libgomp.c++/loop-14.C.jj	2013-05-15 23:33:49.937459988 +0200
+++ libgomp/testsuite/libgomp.c++/loop-14.C	2013-05-15 23:34:00.664399477 +0200
@@ -0,0 +1,3 @@ 
+/* { dg-do run } */
+
+#include "../libgomp.c/loop-14.c"
--- libgomp/testsuite/libgomp.c++/loop-15.C.jj	2013-05-15 23:33:53.459438900 +0200
+++ libgomp/testsuite/libgomp.c++/loop-15.C	2013-05-15 23:34:06.043366940 +0200
@@ -0,0 +1,3 @@ 
+/* { dg-do run } */
+
+#include "../libgomp.c/loop-15.c"