diff mbox series

[v2,4/4] diagnostics: Support generated data locations in SARIF output

Message ID e364f3a6881ed90c41090e890121332adf62c0ee.1672867272.git.lhyatt@gmail.com
State New
Headers show
Series diagnostics: libcpp: Overhaul locations for _Pragma tokens | expand

Commit Message

Lewis Hyatt Jan. 5, 2023, 10:36 p.m. UTC
The diagnostics routines for SARIF output need to read the source code back
in, so that they can generate "snippet" and "content" records, so they need to
be able to cope with generated data locations.  Add support for that in
diagnostic-format-sarif.cc.

gcc/ChangeLog:

	* diagnostic-format-sarif.cc (sarif_builder::xloc_to_fb): New function.
	(sarif_builder::maybe_make_physical_location_object): Support
	generated data locations.
	(sarif_builder::make_artifact_location_object): Likewise.
	(sarif_builder::maybe_make_region_object_for_context): Likewise.
	(sarif_builder::make_artifact_object): Likewise.
	(sarif_builder::maybe_make_artifact_content_object): Likewise.
	(get_source_lines): Likewise.

gcc/testsuite/ChangeLog:

	* c-c++-common/diagnostic-format-sarif-file-5.c: New test.
---
 gcc/diagnostic-format-sarif.cc                | 102 +++++++++++-------
 .../diagnostic-format-sarif-file-5.c          |  31 ++++++
 2 files changed, 93 insertions(+), 40 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-5.c
diff mbox series

Patch

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index f8fdd586ff0..99aba1414ea 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -125,7 +125,10 @@  private:
   json::array *maybe_make_kinds_array (diagnostic_event::meaning m) const;
   json::object *maybe_make_physical_location_object (location_t loc);
   json::object *make_artifact_location_object (location_t loc);
-  json::object *make_artifact_location_object (const char *filename);
+
+  typedef std::pair<const char *, unsigned int> filename_or_buffer;
+  json::object *make_artifact_location_object (filename_or_buffer fb);
+
   json::object *make_artifact_location_object_for_pwd () const;
   json::object *maybe_make_region_object (location_t loc) const;
   json::object *maybe_make_region_object_for_context (location_t loc) const;
@@ -146,16 +149,17 @@  private:
   json::object *make_reporting_descriptor_object_for_cwe_id (int cwe_id) const;
   json::object *
   make_reporting_descriptor_reference_object_for_cwe_id (int cwe_id);
-  json::object *make_artifact_object (const char *filename);
-  json::object *maybe_make_artifact_content_object (const char *filename) const;
-  json::object *maybe_make_artifact_content_object (const char *filename,
-						    int start_line,
+  json::object *make_artifact_object (filename_or_buffer fb);
+  json::object *
+  maybe_make_artifact_content_object (filename_or_buffer fb) const;
+  json::object *maybe_make_artifact_content_object (expanded_location xloc,
 						    int end_line) const;
   json::object *make_fix_object (const rich_location &rich_loc);
   json::object *make_artifact_change_object (const rich_location &richloc);
   json::object *make_replacement_object (const fixit_hint &hint) const;
   json::object *make_artifact_content_object (const char *text) const;
   int get_sarif_column (expanded_location exploc) const;
+  static filename_or_buffer xloc_to_fb (expanded_location xloc);
 
   diagnostic_context *m_context;
 
@@ -166,7 +170,11 @@  private:
      diagnostic group.  */
   sarif_result *m_cur_group_result;
 
-  hash_set <const char *> m_filenames;
+  /* If the second member is >0, then this is a buffer of generated content,
+     with that length, not a filename.  */
+  hash_set <pair_hash <nofree_ptr_hash <const char>,
+		       int_hash <unsigned int, -1U> >
+	    > m_filenames;
   bool m_seen_any_relative_paths;
   hash_set <free_string_hash> m_rule_id_set;
   json::array *m_rules_arr;
@@ -588,6 +596,15 @@  sarif_builder::make_location_object (const diagnostic_event &event)
   return location_obj;
 }
 
+/* Populate a filename_or_buffer pair from an expanded location.  */
+sarif_builder::filename_or_buffer
+sarif_builder::xloc_to_fb (expanded_location xloc)
+{
+  if (xloc.generated_data_len)
+    return filename_or_buffer (xloc.generated_data, xloc.generated_data_len);
+  return filename_or_buffer (xloc.file, 0);
+}
+
 /* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC,
    or return NULL;
    Add any filename to the m_artifacts.  */
@@ -603,7 +620,7 @@  sarif_builder::maybe_make_physical_location_object (location_t loc)
   /* "artifactLocation" property (SARIF v2.1.0 section 3.29.3).  */
   json::object *artifact_loc_obj = make_artifact_location_object (loc);
   phys_loc_obj->set ("artifactLocation", artifact_loc_obj);
-  m_filenames.add (LOCATION_FILE (loc));
+  m_filenames.add (xloc_to_fb (expand_location (loc)));
 
   /* "region" property (SARIF v2.1.0 section 3.29.4).  */
   if (json::object *region_obj = maybe_make_region_object (loc))
@@ -627,7 +644,7 @@  sarif_builder::maybe_make_physical_location_object (location_t loc)
 json::object *
 sarif_builder::make_artifact_location_object (location_t loc)
 {
-  return make_artifact_location_object (LOCATION_FILE (loc));
+  return make_artifact_location_object (xloc_to_fb (expand_location (loc)));
 }
 
 /* The ID value for use in "uriBaseId" properties (SARIF v2.1.0 section 3.4.4)
@@ -639,10 +656,12 @@  sarif_builder::make_artifact_location_object (location_t loc)
    or return NULL.  */
 
 json::object *
-sarif_builder::make_artifact_location_object (const char *filename)
+sarif_builder::make_artifact_location_object (filename_or_buffer fb)
 {
   json::object *artifact_loc_obj = new json::object ();
 
+  const auto filename = (fb.second ? special_fname_generated () : fb.first);
+
   /* "uri" property (SARIF v2.1.0 section 3.4.3).  */
   artifact_loc_obj->set ("uri", new json::string (filename));
 
@@ -795,9 +814,7 @@  sarif_builder::maybe_make_region_object_for_context (location_t loc) const
 
   /* "snippet" property (SARIF v2.1.0 section 3.30.13).  */
   if (json::object *artifact_content_obj
-	 = maybe_make_artifact_content_object (exploc_start.file,
-					       exploc_start.line,
-					       exploc_finish.line))
+	= maybe_make_artifact_content_object (exploc_start, exploc_finish.line))
     region_obj->set ("snippet", artifact_content_obj);
 
   return region_obj;
@@ -1248,24 +1265,24 @@  sarif_builder::maybe_make_cwe_taxonomy_object () const
 /* Make an artifact object (SARIF v2.1.0 section 3.24).  */
 
 json::object *
-sarif_builder::make_artifact_object (const char *filename)
+sarif_builder::make_artifact_object (filename_or_buffer fb)
 {
   json::object *artifact_obj = new json::object ();
 
   /* "location" property (SARIF v2.1.0 section 3.24.2).  */
-  json::object *artifact_loc_obj = make_artifact_location_object (filename);
+  json::object *artifact_loc_obj = make_artifact_location_object (fb);
   artifact_obj->set ("location", artifact_loc_obj);
 
   /* "contents" property (SARIF v2.1.0 section 3.24.8).  */
   if (json::object *artifact_content_obj
-	= maybe_make_artifact_content_object (filename))
+	= maybe_make_artifact_content_object (fb))
     artifact_obj->set ("contents", artifact_content_obj);
 
   /* "sourceLanguage" property (SARIF v2.1.0 section 3.24.10).  */
   if (m_context->m_client_data_hooks)
     if (const char *source_lang
 	= m_context->m_client_data_hooks->maybe_get_sarif_source_language
-	    (filename))
+	    (fb.first))
       artifact_obj->set ("sourceLanguage", new json::string (source_lang));
 
   return artifact_obj;
@@ -1331,34 +1348,40 @@  maybe_read_file (const char *filename)
    full contents of FILENAME.  */
 
 json::object *
-sarif_builder::maybe_make_artifact_content_object (const char *filename) const
+sarif_builder::maybe_make_artifact_content_object (filename_or_buffer fb) const
 {
-  char *text_utf8 = maybe_read_file (filename);
-  if (!text_utf8)
-    return NULL;
-
-  json::object *artifact_content_obj = new json::object ();
-  artifact_content_obj->set ("text", new json::string (text_utf8));
-  free (text_utf8);
-
+  json::object *artifact_content_obj = nullptr;
+  if (fb.second)
+    {
+      artifact_content_obj = new json::object ();
+      artifact_content_obj->set ("text", new json::string (fb.first,
+							   fb.second));
+    }
+  else if (char *text_utf8 = maybe_read_file (fb.first))
+    {
+      artifact_content_obj = new json::object ();
+      artifact_content_obj->set ("text", new json::string (text_utf8));
+      free (text_utf8);
+    }
   return artifact_content_obj;
 }
 
 /* Attempt to read the given range of lines from FILENAME; return
-   a freshly-allocated 0-terminated buffer containing them, or NULL.  */
+   a freshly-allocated buffer containing them, or NULL.
+   The buffer is null-terminated, but could also contain embedded null
+   bytes, so the char_span's length() accessor should be used.  */
 
-static char *
-get_source_lines (const char *filename,
-		  int start_line,
+static char_span
+get_source_lines (expanded_location xloc,
 		  int end_line)
 {
   auto_vec<char> result;
 
-  for (int line = start_line; line <= end_line; line++)
+  for (int line = xloc.line; line <= end_line; line++)
     {
-      char_span line_content = location_get_source_line (filename, line);
+      char_span line_content = location_get_source_line (xloc, line);
       if (!line_content.get_buffer ())
-	return NULL;
+	return char_span (nullptr, 0);
       result.reserve (line_content.length () + 1);
       for (size_t i = 0; i < line_content.length (); i++)
 	result.quick_push (line_content[i]);
@@ -1366,26 +1389,25 @@  get_source_lines (const char *filename,
     }
   result.safe_push ('\0');
 
-  return xstrdup (result.address ());
+  return char_span (xstrdup (result.address ()), result.length () - 1);
 }
 
 /* Make an artifactContent object (SARIF v2.1.0 section 3.3) for the given
-   run of lines within FILENAME (including the endpoints).  */
+   run of lines starting at XLOC (including the endpoints).  */
 
 json::object *
-sarif_builder::maybe_make_artifact_content_object (const char *filename,
-						   int start_line,
+sarif_builder::maybe_make_artifact_content_object (expanded_location xloc,
 						   int end_line) const
 {
-  char *text_utf8 = get_source_lines (filename, start_line, end_line);
+  const char_span text_utf8 = get_source_lines (xloc, end_line);
 
   if (!text_utf8)
     return NULL;
 
   json::object *artifact_content_obj = new json::object ();
-  artifact_content_obj->set ("text", new json::string (text_utf8));
-  free (text_utf8);
-
+  artifact_content_obj->set ("text", new json::string (text_utf8.get_buffer (),
+						       text_utf8.length ()));
+  free (const_cast<char *> (text_utf8.get_buffer ()));
   return artifact_content_obj;
 }
 
diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-5.c b/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-5.c
new file mode 100644
index 00000000000..2ca6a069d3f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-5.c
@@ -0,0 +1,31 @@ 
+/* The goal is to test SARIF output of generated data, such as a _Pragma string.
+   But SARIF output as of yet does not output macro definitions, so such
+   generated data buffers never end up in the typical SARIF output.  One way we
+   can achieve it is to use -fdump-internal-locations, which outputs top-level
+   diagnostic notes inside macro definitions, that SARIF will end up processing.
+   It also outputs a lot of other stuff to stderr (not to the SARIF file) that
+   is not relevant to this test, so we use a blanket dg-regexp to filter all of
+   that away.  */
+
+/* { dg-do compile } */
+/* { dg-options "-fdiagnostics-format=sarif-file -fdump-internal-locations" } */
+/* { dg-allow-blank-lines-in-output "" } */
+
+_Pragma("GCC diagnostic push")
+
+/* { dg-regexp {(.|[\n\r])*} } */
+
+/* Because of the way -fdump-internal-locations works, these regexes themselves
+   will end up in the sarif output also.  But due to the escaping, they don't
+   match themselves, so they still test what we need.  */
+
+/* Four of this pair are output for the tokens inside the
+   _Pragma string (3 plus a PRAGMA_EOL).  */
+
+/* { dg-final { scan-sarif-file "\"artifactLocation\": \{\"uri\": \"<generated>\"," } } */
+/* { dg-final { scan-sarif-file "\"snippet\": \{\"text\": \"GCC diagnostic push\\\\n\"" } } */
+
+/* One of this pair is output for the overall internal location.  */
+
+/* { dg-final { scan-sarif-file "\{\"location\": \{\"uri\": \"<generated>\"," } } */
+/* { dg-final { scan-sarif-file "\"contents\": \{\"text\": \"GCC diagnostic push\\\\n\\\\0" } } */