diff mbox

[gomp4] propagating conditionals in worker-vector partitioned loops

Message ID ddbab8ee-8fb3-0a22-20ea-5c905a170fe4@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis Oct. 28, 2016, 4:33 p.m. UTC
I've applied the patch to gomp-4_0-branch to correct an issue involving
the propagation of variables used in conditional expressions to worker
and vector partitioned loops. More details regarding this patch can be
found here <https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02187.html>

Cesar
diff mbox

Patch

2016-10-26  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* config/nvptx/nvptx.c (nvptx_single): Use a single predicate
	for loops partitioned across both worker and vector axes.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/broadcast-1.c: New test.


diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 7bf5987..4e6ed60 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3507,11 +3507,38 @@  nvptx_single (unsigned mask, basic_block from, basic_block to)
   /* Insert the vector test inside the worker test.  */
   unsigned mode;
   rtx_insn *before = tail;
+  rtx wvpred = NULL_RTX;
+  bool skip_vector = false;
+
+  /* Create a single predicate for loops containing both worker and
+     vectors.  */
+  if (cond_branch
+      && (GOMP_DIM_MASK (GOMP_DIM_WORKER) & mask)
+      && (GOMP_DIM_MASK (GOMP_DIM_VECTOR) & mask))
+    {
+      rtx regx = gen_reg_rtx (SImode);
+      rtx regy = gen_reg_rtx (SImode);
+      rtx tmp = gen_reg_rtx (SImode);
+      wvpred = gen_reg_rtx (BImode);
+
+      emit_insn_before (gen_oacc_dim_pos (regx, const1_rtx), head);
+      emit_insn_before (gen_oacc_dim_pos (regy, const2_rtx), head);
+      emit_insn_before (gen_rtx_SET (tmp, gen_rtx_IOR (SImode, regx, regy)),
+			head);
+      emit_insn_before (gen_rtx_SET (wvpred, gen_rtx_NE (BImode, tmp,
+							 const0_rtx)),
+			head);
+
+      skip_mask &= ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR));
+      skip_vector = true;
+    }
+
   for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
     if (GOMP_DIM_MASK (mode) & skip_mask)
       {
 	rtx_code_label *label = gen_label_rtx ();
-	rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER];
+	rtx pred = skip_vector ? wvpred
+	  : cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER];
 
 	if (!pred)
 	  {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c
new file mode 100644
index 0000000..4dcb60d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c
@@ -0,0 +1,49 @@ 
+/* Ensure that worker-vector state conditional expressions are
+   properly handled by the nvptx backend.  */
+
+#include <assert.h>
+#include <math.h>
+
+
+#define N 1024
+
+int A[N][N] ;
+
+void test(int x)
+{
+#pragma acc parallel  num_gangs(16) num_workers(4) vector_length(32) copyout(A)
+  {
+#pragma acc loop gang
+    for(int j=0;j<N;j++)
+      {
+	if (x==1)
+	  {
+#pragma acc loop worker vector
+	    for(int i=0;i<N;i++)
+	      A[i][j] = 1;
+	  }
+	else
+	  {
+#pragma acc loop worker vector
+	    for(int i=0;i<N;i++)
+	      A[i][j] = -1;
+	  }
+      }
+  }
+}
+
+
+int main(void)
+{
+  test (0);
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      assert (A[i][j] == -1);
+
+  test (1);
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      assert (A[i][j] == 1);
+
+  return 0;
+}