diff mbox

[PTX] fix worker propagation ICE

Message ID 6af195bc-be9b-b71a-f92d-7116bdf3f372@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Aug. 3, 2016, 5:30 p.m. UTC
The PTX backend could ice when generating a state propagation sequence entering 
partitioned execution.  Although the stack frame is DImode aligned, nothing 
actually rounds the size up consistent with that.  That meant we could encounter 
frames that were not a DImode multiple in size.  Which broke the assert checking 
that.

Rather than faff around trying to copy just the extra bit on the end of such a 
frame, I changed the frame emission to round the size up, and adjust the 
propagation machinery likewise.  (Mostly one gets frames when not optimizing 
anyway).

Applied to trunk & gomp4.
diff mbox

Patch

2016-08-03  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* config/nvptx/nvptx.c (nvptx_declare_function_name): Round frame
	size to DImode boundary.
	(nvptx_propagate): Likewise.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/crash-1.c: New.

Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 239084)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -999,11 +999,14 @@  nvptx_declare_function_name (FILE *file,
     init_frame (file, STACK_POINTER_REGNUM,
 		UNITS_PER_WORD, crtl->outgoing_args_size);
 
-  /* Declare a local variable for the frame.  */
+  /* Declare a local variable for the frame.  Force its size to be
+     DImode-compatible.  */
   HOST_WIDE_INT sz = get_frame_size ();
   if (sz || cfun->machine->has_chain)
     init_frame (file, FRAME_POINTER_REGNUM,
-		crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
+		crtl->stack_alignment_needed / BITS_PER_UNIT,
+		(sz + GET_MODE_SIZE (DImode) - 1)
+		& ~(HOST_WIDE_INT)(GET_MODE_SIZE (DImode) - 1));
 
   /* Declare the pseudos we have as ptx registers.  */
   int maxregs = max_reg_num ();
@@ -3222,8 +3225,9 @@  nvptx_propagate (basic_block block, rtx_
       rtx pred = NULL_RTX;
       rtx_code_label *label = NULL;
 
-      gcc_assert (!(fs & (GET_MODE_SIZE (DImode) - 1)));
-      fs /= GET_MODE_SIZE (DImode);
+      /* The frame size might not be DImode compatible, but the frame
+	 array's declaration will be.  So it's ok to round up here.  */
+      fs = (fs + GET_MODE_SIZE (DImode) - 1) / GET_MODE_SIZE (DImode);
       /* Detect single iteration loop. */
       if (fs == 1)
 	fs = 0;
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c	(nonexistent)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c	(working copy)
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+/* ICEd in nvptx backend due to unexpected frame size.  */
+#pragma acc routine worker
+void
+worker_matmul (int *c, int i)
+{
+  int j;
+
+#pragma acc loop
+  for (j = 0; j < 4; j++)
+    c[j] = j;
+}
+
+
+int
+main ()
+{
+  int c[4];
+
+#pragma acc parallel 
+  {
+    worker_matmul (c, 0);
+  }
+  
+  return 0;
+}