diff mbox

Fix OpenMP combined simd offloading fallback if nvptx-none offloading is configured (PR middle-end/79236)

Message ID 20170126111803.GB1867@tucnak
State New
Headers show

Commit Message

Jakub Jelinek Jan. 26, 2017, 11:18 a.m. UTC
Hi!

If configured --enable-offload-targets=nvptx-none but for whatever
reason we can't offload to PTX (missing libgomp-nvptx* plugin,
missing libcuda.so.1, missing NVidia HW), lots of libgomp tests
fail.  The problem is that if there is simd combined with some
other loop (e.g. distribute simd, for simd, distribute parallel for simd)
after scan_omp creates the separate GIMPLE_OMP_FOR for simt only
(with _simt_ clause) and original GIMPLE_OMP_FOR for non-simt,
and then we lower_omp_for both of them, _looptemp_ clauses are
added to each of those with different decls.  When expanding
the outer loop construct (for, distribute) we then look up
inner_stmt and as there are now 2, we pick just one of them, apparently
the _simt_ one and the outer loop construct initializes those
_looptemp_ temporaries to the needed values, and then when
expanding each GIMPLE_OMP_FOR simd, it assumes the outer loop
initialized its _looptemp_ temporaries.  But this only works properly
if the decls in both simd constructs are the same, otherwise there
is no agreement between outer and inner construct on where the values are
passed.

Fixed by making sure we use the same decls between the sibling simd
constructs in this case.

Bootstrapped/regtested on x86_64-linux and i686-linux, additionally
tested with installed compiler testing of 3 different setups:
1) gcc without the libgomp-nvptx plugin and without the */accel support
   (I have a patch which makes that quietly non-fatal when not using
   explicit -foffload=, any interest in that upstream, or shall I keep
   that as a local hack; the purpose of that is for our rpm packaging,
   the compiler is always configured with nvptx-none offloading, but
   if gcc-offload-nvptx/libgomp-offload-nvptx packages aren't instaled,
   it works as if it wasn't configured in by default)
2) gcc with the libgomp-nvptx plugin, libcuda.so.1 moved away and
   without the */accel support
3) gcc with the libgomp-nvptx plugin, libcuda.so.1 accessible and
   */accel support as well (this is where it actually offloads and
   the few expected failures occur:
FAIL: libgomp.c/target-32.c (test for excess errors)
FAIL: libgomp.c/target-33.c execution test
FAIL: libgomp.c/target-34.c execution test
FAIL: libgomp.c/target-link-1.c execution test
FAIL: libgomp.c/thread-limit-2.c (test for excess errors)
FAIL: libgomp.fortran/target2.f90   -O0  execution test
FAIL: libgomp.fortran/target2.f90   -O1  execution test
   )
Committed to trunk.

2017-01-26  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/79236
	* omp-low.c (struct omp_context): Add simt_stmt field.
	(scan_omp_for): Return omp_context *.
	(scan_omp_simd): Set simt_stmt on the non-_simt_ SIMD
	context to the _simt_ SIMD stmt.
	(lower_omp_for): For combined SIMD with sibling _simt_
	SIMD, make sure to use the same decls in _looptemp_
	clauses as in the sibling.


	Jakub
diff mbox

Patch

--- gcc/omp-low.c.jj	2017-01-21 02:25:58.000000000 +0100
+++ gcc/omp-low.c	2017-01-25 22:28:43.059591621 +0100
@@ -108,6 +108,10 @@  struct omp_context
      barriers should jump to during omplower pass.  */
   tree cancel_label;
 
+  /* The sibling GIMPLE_OMP_FOR simd with _simt_ clause or NULL
+     otherwise.  */
+  gimple *simt_stmt;
+
   /* What to do with variables with implicitly determined sharing
      attributes.  */
   enum omp_clause_default_kind default_kind;
@@ -2127,7 +2131,7 @@  check_oacc_kernel_gwv (gomp_for *stmt, o
 
 /* Scan a GIMPLE_OMP_FOR.  */
 
-static void
+static omp_context *
 scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
 {
   omp_context *ctx;
@@ -2200,6 +2204,7 @@  scan_omp_for (gomp_for *stmt, omp_contex
       scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
     }
   scan_omp (gimple_omp_body_ptr (stmt), ctx);
+  return ctx;
 }
 
 /* Duplicate #pragma omp simd, one for SIMT, another one for SIMD.  */
@@ -2241,7 +2246,7 @@  scan_omp_simd (gimple_stmt_iterator *gsi
   gimple_bind_set_body (bind, seq);
   update_stmt (bind);
   scan_omp_for (new_stmt, outer_ctx);
-  scan_omp_for (stmt, outer_ctx);
+  scan_omp_for (stmt, outer_ctx)->simt_stmt = new_stmt;
 }
 
 /* Scan an OpenMP sections directive.  */
@@ -6750,11 +6755,15 @@  lower_omp_for (gimple_stmt_iterator *gsi
 	= (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
 	   || gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
       tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
+      tree simtc = NULL;
       tree clauses = *pc;
       if (taskreg_for)
 	outerc
 	  = omp_find_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
 			     OMP_CLAUSE__LOOPTEMP_);
+      if (ctx->simt_stmt)
+	simtc = omp_find_clause (gimple_omp_for_clauses (ctx->simt_stmt),
+				 OMP_CLAUSE__LOOPTEMP_);
       for (i = 0; i < count; i++)
 	{
 	  tree temp;
@@ -6767,12 +6776,22 @@  lower_omp_for (gimple_stmt_iterator *gsi
 	    }
 	  else
 	    {
-	      temp = create_tmp_var (type);
+	      /* If there are 2 adjacent SIMD stmts, one with _simt_
+		 clause, another without, make sure they have the same
+		 decls in _looptemp_ clauses, because the outer stmt
+		 they are combined into will look up just one inner_stmt.  */
+	      if (ctx->simt_stmt)
+		temp = OMP_CLAUSE_DECL (simtc);
+	      else
+		temp = create_tmp_var (type);
 	      insert_decl_map (&ctx->outer->cb, temp, temp);
 	    }
 	  *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
 	  OMP_CLAUSE_DECL (*pc) = temp;
 	  pc = &OMP_CLAUSE_CHAIN (*pc);
+	  if (ctx->simt_stmt)
+	    simtc = omp_find_clause (OMP_CLAUSE_CHAIN (simtc),
+				     OMP_CLAUSE__LOOPTEMP_);
 	}
       *pc = clauses;
     }