diff mbox

[gomp4] lto dump of callee counts

Message ID 25c79eca-3233-df6b-b257-18176b9fa15c@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Aug. 26, 2016, 8:29 p.m. UTC
I've committed this to gomp4 branch.  The lto device compiler was ICEing when 
reading in the offload inline statistics.  The root cause was due to it not 
having the function bodies of some functions, it therefore didn't  try and read 
callee statistics.  Thus starting reading the next function's data early, and in 
this particular case ICEing due to a failed assert.

The statistics write out should only include callee data when the function body 
is also being dumped.  For  regular LTO we always do  that when available, but 
for offload LTO we only dump bodies of functions marked for offload.

Such a case isn't necessarily an error, as we may be wanting to link with a 
function implementation provided by a library.  In this example case, it's user 
error and we'll  eventually produce a final link error.

The testcase is set up so that another TU provides the function implementations 
and we get an executable program.

nathan
diff mbox

Patch

2016-08-26  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* ipa-inline-analysis.c (inline_write_summary): Only dump callee
	counts when dumping the function's body.

	libgomp/
	* testsuite/libgomp.oacc-c++/pr71959.C: New.
	* testsuite/libgomp.oacc-c++/pr71959-a.C: New.

Index: gcc/ipa-inline-analysis.c
===================================================================
--- gcc/ipa-inline-analysis.c	(revision 239787)
+++ gcc/ipa-inline-analysis.c	(working copy)
@@ -4383,8 +4383,10 @@  inline_write_summary (void)
 	  int i;
 	  size_time_entry *e;
 	  struct condition *c;
+	  int index = lto_symtab_encoder_encode (encoder, cnode);
+	  bool body = encoder->nodes[index].body;
 
-	  streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
+	  streamer_write_uhwi (ob, index);
 	  streamer_write_hwi (ob, info->estimated_self_stack_size);
 	  streamer_write_hwi (ob, info->self_size);
 	  streamer_write_hwi (ob, info->self_time);
@@ -4415,10 +4417,17 @@  inline_write_summary (void)
 	  write_predicate (ob, info->loop_iterations);
 	  write_predicate (ob, info->loop_stride);
 	  write_predicate (ob, info->array_index);
-	  for (edge = cnode->callees; edge; edge = edge->next_callee)
-	    write_inline_edge_summary (ob, edge);
-	  for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
-	    write_inline_edge_summary (ob, edge);
+	  if (body)
+	    {
+	      /* Only write callee counts when we're emitting the
+		 body, as the reader only knows about the callees when
+		 the body's emitted.  */
+	      for (edge = cnode->callees; edge; edge = edge->next_callee)
+		write_inline_edge_summary (ob, edge);
+	      for (edge = cnode->indirect_calls; edge;
+		   edge = edge->next_callee)
+		write_inline_edge_summary (ob, edge);
+	    }
 	}
     }
   streamer_write_char_stream (ob->main_stream, 0);
Index: libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C
===================================================================
--- libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C	(nonexistent)
+++ libgomp/testsuite/libgomp.oacc-c++/pr71959-a.C	(working copy)
@@ -0,0 +1,31 @@ 
+// { dg-do compile }
+
+struct Iter 
+{
+  int *cursor;
+
+  void ctor (int *cursor_) asm("_ZN4IterC1EPi");
+  int *point () const asm("_ZNK4Iter5pointEv");
+};
+
+#pragma acc routine
+void  Iter::ctor (int *cursor_)
+{
+  cursor = cursor_;
+}
+
+#pragma acc routine
+int *Iter::point () const
+{
+  return cursor;
+}
+
+void apply (int (*fn)(), Iter out) asm ("_ZN5Apply5applyEPFivE4Iter");
+
+#pragma acc routine
+void apply (int (*fn)(), struct Iter out)
+{ *out.point() = fn (); }
+
+extern "C" void __gxx_personality_v0 ()
+{
+}
Index: libgomp/testsuite/libgomp.oacc-c++/pr71959.C
===================================================================
--- libgomp/testsuite/libgomp.oacc-c++/pr71959.C	(nonexistent)
+++ libgomp/testsuite/libgomp.oacc-c++/pr71959.C	(working copy)
@@ -0,0 +1,31 @@ 
+// { dg-additional-sources "pr71959-a.C" }
+
+// pr lto/71959 ICEd LTO due to mismatch between writing & reading behaviour
+
+struct Iter
+{
+  int *cursor;
+  
+  Iter(int *cursor_) : cursor(cursor_) {}
+
+  int *point() const { return cursor; }
+};
+
+#pragma acc routine seq
+int one () { return 1; }
+
+struct Apply
+{
+  static void apply (int (*fn)(), Iter out)
+  { *out.point() = fn (); }
+};
+
+int main ()
+{
+  int x;
+  
+#pragma acc parallel copyout(x)
+  Apply::apply (one, Iter (&x));
+
+  return x != 1;
+}