diff mbox

[gomp4.1] Parsing ordered(n) loops in C/C++

Message ID 20150702075501.GC10247@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek July 2, 2015, 7:55 a.m. UTC
Hi!

I've committed following patch to parse collapse+ordered-1 loops
if ordered(n) clause is present, and adjust ompexp, so that we actually
expand it as a collapsed loop with normal ordered-1 loops inside of it.

Example testcase (for now can be parsed and expanded only with the
ordered constructs commented out, Aldy is working on that).

void bar (int, int, int);

void
foo (int n, int m, int o)
{
  int i, j, k;
  #pragma omp for collapse(2) ordered(2)
  for (i = 0; i < m; i++)
    {
      for (j = 0; j < n; j++)
        for (k = 0; k < o; k++)
          {
#pragma omp ordered depend(sink: i-1,j,k) depend(sink: i,j-1,k-1) depend(sink: i-1,j-1,k+1)
	    bar (i, j, k);
#pragma omp ordered depend(source)
	  }
    }
}

int
baz ()
{
  int i, j;
#pragma omp parallel for ordered(2)
  for (i=0; i < 100; ++i)
    for (j=0; j < 100; ++j)
      {
#pragma omp ordered depend(sink:i-1,j-3)
        bar (i, j, 0);
#pragma omp ordered depend(source)
      }
}

2015-07-02  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (struct omp_for_data): Add ordered field.
	(extract_omp_for_data): Handle loops with ordered(n) clause.
	(expand_omp_for_ordered_loops): New function.
	(expand_omp_for_generic): Call it.
c/
	* c-parser.c (c_parser_omp_for_loop): Parse collapse + ordered - 1
	nested loops if ordered(n) clause is present.
cp/
	* parser.c (cp_parser_omp_for_loop): Parse collapse + ordered - 1
	nested loops if ordered(n) clause is present.


	Jakub
diff mbox

Patch

--- gcc/omp-low.c.jj	2015-07-01 12:50:49.000000000 +0200
+++ gcc/omp-low.c	2015-07-02 09:27:03.546405031 +0200
@@ -236,6 +236,7 @@  struct omp_for_data
   gomp_for *for_stmt;
   tree pre, iter_type;
   int collapse;
+  int ordered;
   bool have_nowait, have_ordered, simd_schedule;
   enum omp_clause_schedule_kind sched_kind;
   struct omp_for_data_loop *loops;
@@ -489,14 +490,15 @@  extract_omp_for_data (gomp_for *for_stmt
 
   fd->for_stmt = for_stmt;
   fd->pre = NULL;
-  fd->collapse = gimple_omp_for_collapse (for_stmt);
-  if (fd->collapse > 1)
+  if (gimple_omp_for_collapse (for_stmt) > 1)
     fd->loops = loops;
   else
     fd->loops = &fd->loop;
 
   fd->have_nowait = distribute || simd;
   fd->have_ordered = false;
+  fd->collapse = 1;
+  fd->ordered = 0;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
   fd->simd_schedule = false;
@@ -513,6 +515,8 @@  extract_omp_for_data (gomp_for *for_stmt
 	break;
       case OMP_CLAUSE_ORDERED:
 	fd->have_ordered = true;
+	if (OMP_CLAUSE_ORDERED_EXPR (t))
+	  fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
 	break;
       case OMP_CLAUSE_SCHEDULE:
 	gcc_assert (!distribute && !taskloop);
@@ -525,6 +529,7 @@  extract_omp_for_data (gomp_for *for_stmt
 	fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
 	break;
       case OMP_CLAUSE_COLLAPSE:
+	fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
 	if (fd->collapse > 1)
 	  {
 	    collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
@@ -559,9 +564,10 @@  extract_omp_for_data (gomp_for *for_stmt
 			 ? integer_zero_node : integer_one_node;
     }
 
-  for (i = 0; i < fd->collapse; i++)
+  int cnt = fd->collapse + (fd->ordered > 0 ? fd->ordered - 1 : 0);
+  for (i = 0; i < cnt; i++)
     {
-      if (fd->collapse == 1)
+      if (i == 0 && fd->collapse == 1)
 	loop = &fd->loop;
       else if (loops != NULL)
 	loop = loops + i;
@@ -589,6 +595,8 @@  extract_omp_for_data (gomp_for *for_stmt
 			  == GF_OMP_FOR_KIND_CILKFOR));
 	  break;
 	case LE_EXPR:
+	  if (i >= fd->collapse)
+	    break;
 	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
 	    loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
 	  else
@@ -598,6 +606,8 @@  extract_omp_for_data (gomp_for *for_stmt
 	  loop->cond_code = LT_EXPR;
 	  break;
 	case GE_EXPR:
+	  if (i >= fd->collapse)
+	    break;
 	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
 	    loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, -1);
 	  else
@@ -690,6 +700,9 @@  extract_omp_for_data (gomp_for *for_stmt
 	    }
 	}
 
+      if (i >= fd->collapse)
+	continue;
+
       if (collapse_count && *collapse_count == NULL)
 	{
 	  t = fold_binary (loop->cond_code, boolean_type_node,
@@ -770,6 +783,8 @@  extract_omp_for_data (gomp_for *for_stmt
       fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
       fd->loop.cond_code = LT_EXPR;
     }
+  else if (loops)
+    loops[0] = fd->loop;
 
   /* For OpenACC loops, force a chunk size of one, as this avoids the default
     scheduling where several subsequent iterations are being executed by the
@@ -6827,6 +6842,81 @@  extract_omp_for_update_vars (struct omp_
 }
 
 
+/* Wrap the body into fd->ordered - 1 loops that aren't collapsed.  */
+
+static basic_block
+expand_omp_for_ordered_loops (struct omp_for_data *fd, basic_block cont_bb,
+			      basic_block body_bb)
+{
+  if (fd->ordered <= 1)
+    return cont_bb;
+
+  if (!cont_bb)
+    {
+      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
+      for (int i = fd->collapse; i < fd->collapse + fd->ordered - 1; i++)
+	{
+	  tree type = TREE_TYPE (fd->loops[i].v);
+	  tree n1 = fold_convert (type, fd->loops[i].n1);
+	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
+	}
+      return NULL;
+    }
+
+  for (int i = fd->collapse + fd->ordered - 2; i >= fd->collapse; i--)
+    {
+      tree t, type = TREE_TYPE (fd->loops[i].v);
+      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
+      expand_omp_build_assign (&gsi, fd->loops[i].v,
+			       fold_convert (type, fd->loops[i].n1));
+      if (!gsi_end_p (gsi))
+	gsi_prev (&gsi);
+      else
+	gsi_last_bb (body_bb);
+      edge e1 = split_block (body_bb, gsi_stmt (gsi));
+      basic_block new_body = e1->dest;
+      if (body_bb == cont_bb)
+	cont_bb = new_body;
+      gsi = gsi_last_bb (cont_bb);
+      if (POINTER_TYPE_P (type))
+	t = fold_build_pointer_plus (fd->loops[i].v,
+				     fold_convert (sizetype, fd->loop.step));
+      else
+	t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
+			 fold_convert (type, fd->loop.step));
+      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+      gsi_prev (&gsi);
+      edge e2 = split_block (cont_bb, gsi_stmt (gsi));
+      basic_block new_header = e2->dest;
+      gsi = gsi_after_labels (new_header);
+      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
+					 true, GSI_SAME_STMT);
+      tree n2
+	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
+				    true, NULL_TREE, true, GSI_SAME_STMT);
+      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
+      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
+      edge e3 = split_block (new_header, gsi_stmt (gsi));
+      cont_bb = e3->dest;
+      remove_edge (e1);
+      make_edge (body_bb, new_header, EDGE_FALLTHRU);
+      e3->flags = EDGE_FALSE_VALUE;
+      e3->probability = REG_BR_PROB_BASE / 8;
+      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
+      e1->probability = REG_BR_PROB_BASE - e3->probability;
+
+      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
+      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
+
+      struct loop *loop = alloc_loop ();
+      loop->header = new_header;
+      loop->latch = e2->src;
+      add_loop (loop, body_bb->loop_father);
+    }
+  return cont_bb;
+}
+
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with any schedule.  Given parameters:
 
@@ -7226,6 +7316,8 @@  expand_omp_for_generic (struct omp_regio
   if (fd->collapse > 1)
     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
 
+  cont_bb = expand_omp_for_ordered_loops (fd, cont_bb, l1_bb);
+
   if (!broken_loop)
     {
       /* Code to control the increment and predicate for the sequential
--- gcc/c/c-parser.c.jj	2015-07-01 12:50:49.000000000 +0200
+++ gcc/c/c-parser.c	2015-07-01 13:01:23.665521635 +0200
@@ -13279,20 +13279,24 @@  c_parser_omp_for_loop (location_t loc, c
   tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl;
   tree declv, condv, incrv, initv, ret = NULL;
   bool fail = false, open_brace_parsed = false;
-  int i, collapse = 1, nbraces = 0;
+  int i, collapse = 1, ordered = 0, count, nbraces = 0;
   location_t for_loc;
   vec<tree, va_gc> *for_block = make_tree_vector ();
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
       collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
-
-  gcc_assert (collapse >= 1);
-
-  declv = make_tree_vec (collapse);
-  initv = make_tree_vec (collapse);
-  condv = make_tree_vec (collapse);
-  incrv = make_tree_vec (collapse);
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
+	     && OMP_CLAUSE_ORDERED_EXPR (cl))
+      ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
+
+  gcc_assert (collapse >= 1 && ordered >= 0);
+  count = collapse + (ordered > 0 ? ordered - 1 : 0);
+
+  declv = make_tree_vec (count);
+  initv = make_tree_vec (count);
+  condv = make_tree_vec (count);
+  incrv = make_tree_vec (count);
 
   if (code != CILK_FOR
       && !c_parser_next_token_is_keyword (parser, RID_FOR))
@@ -13309,7 +13313,7 @@  c_parser_omp_for_loop (location_t loc, c
   for_loc = c_parser_peek_token (parser)->location;
   c_parser_consume_token (parser);
 
-  for (i = 0; i < collapse; i++)
+  for (i = 0; i < count; i++)
     {
       int bracecount = 0;
 
@@ -13418,7 +13422,7 @@  c_parser_omp_for_loop (location_t loc, c
 	}
 
     parse_next:
-      if (i == collapse - 1)
+      if (i == count - 1)
 	break;
 
       /* FIXME: OpenMP 3.0 draft isn't very clear on what exactly is allowed
@@ -13449,7 +13453,7 @@  c_parser_omp_for_loop (location_t loc, c
 		  bracecount--;
 		}
 	      fail = true;
-	      collapse = 0;
+	      count = 0;
 	      break;
 	    }
 	}
@@ -13530,10 +13534,10 @@  c_parser_omp_for_loop (location_t loc, c
 		  c = &OMP_CLAUSE_CHAIN (*c);
 		else
 		  {
-		    for (i = 0; i < collapse; i++)
+		    for (i = 0; i < count; i++)
 		      if (TREE_VEC_ELT (declv, i) == OMP_CLAUSE_DECL (*c))
 			break;
-		    if (i == collapse)
+		    if (i == count)
 		      c = &OMP_CLAUSE_CHAIN (*c);
 		    else if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_FIRSTPRIVATE)
 		      {
--- gcc/cp/parser.c.jj	2015-07-01 12:50:50.000000000 +0200
+++ gcc/cp/parser.c	2015-07-02 09:29:23.766300769 +0200
@@ -30881,23 +30881,27 @@  cp_parser_omp_for_loop (cp_parser *parse
   tree this_pre_body, cl;
   location_t loc_first;
   bool collapse_err = false;
-  int i, collapse = 1, nbraces = 0;
+  int i, collapse = 1, ordered = 0, count, nbraces = 0;
   vec<tree, va_gc> *for_block = make_tree_vector ();
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
       collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
-
-  gcc_assert (collapse >= 1);
-
-  declv = make_tree_vec (collapse);
-  initv = make_tree_vec (collapse);
-  condv = make_tree_vec (collapse);
-  incrv = make_tree_vec (collapse);
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
+	     && OMP_CLAUSE_ORDERED_EXPR (cl))
+      ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
+
+  gcc_assert (collapse >= 1 && ordered >= 0);
+  count = collapse + (ordered > 0 ? ordered - 1 : 0);
+
+  declv = make_tree_vec (count);
+  initv = make_tree_vec (count);
+  condv = make_tree_vec (count);
+  incrv = make_tree_vec (count);
 
   loc_first = cp_lexer_peek_token (parser->lexer)->location;
 
-  for (i = 0; i < collapse; i++)
+  for (i = 0; i < count; i++)
     {
       int bracecount = 0;
       tree add_private_clause = NULL_TREE;
@@ -31059,7 +31063,7 @@  cp_parser_omp_for_loop (cp_parser *parse
       TREE_VEC_ELT (condv, i) = cond;
       TREE_VEC_ELT (incrv, i) = incr;
 
-      if (i == collapse - 1)
+      if (i == count - 1)
 	break;
 
       /* FIXME: OpenMP 3.0 draft isn't very clear on what exactly is allowed