diff mbox series

[RFC] Add function filtering to gcov

Message ID 20240329190201.1601481-1-j@lambda.is
State New
Headers show
Series [RFC] Add function filtering to gcov | expand

Commit Message

Jørgen Kvalsvik March 29, 2024, 7:02 p.m. UTC
This is a prototype for --include/--exclude flags, and I would like a
review of both the approach and architecture, and the implementation,
plus feedback on the feature itself. I did not update the manuals or
carefully extend --help, in case the interface itself needs some
revision before it can be merged.

---

Add the --include and --exclude flags to gcov to control what functions
to report on. This is meant to make gcov more practical as an when
writing test suites or performing other coverage experiments, which
tends to focus on a few functions at the time. This really shines in
combination with the -t/--stdout flag. With support for more expansive
metrics in gcov like modified condition/decision coverage (MC/DC) and
path coverage, output quickly gets overwhelming without filtering.

The approach is quite simple: filters are egrep regexes and are
evaluated left-to-right, and the last filter "wins", that is, if a
function matches an --include and a subsequent --exclude, it should not
be included in the output. The output machinery is already interacting
with the function table, which makes the json output work as expected,
and only minor changes are needed to suppress the filtered-out
functions.

Demo: math.c

    int mul (int a, int b) {
        return a * b;
    }

    int sub (int a, int b) {
        return a - b;
    }

    int sum (int a, int b) {
        return a + b;
    }

Plain matches:

$ gcov -t math --include=sum
        -:    0:Source:filter.c
        -:    0:Graph:filter.gcno
        -:    0:Data:-
        -:    0:Runs:0
    #####:    9:int sum (int a, int b) {
    #####:   10:    return a + b;

$ gcov -t math --include=mul
        -:    0:Source:filter.c
        -:    0:Graph:filter.gcno
        -:    0:Data:-
        -:    0:Runs:0
    #####:    1:int mul (int a, int b) {
    #####:    2:    return a * b;

Regex match:

$ gcov -t math --include=su
        -:    0:Source:filter.c
        -:    0:Graph:filter.gcno
        -:    0:Data:-
        -:    0:Runs:0
    #####:    5:int sub (int a, int b) {
    #####:    6:    return a - b;
        -:    7:}
    #####:    9:int sum (int a, int b) {
    #####:   10:    return a + b;

And similar for exclude:

$ gcov -t math --exclude=sum
        -:    0:Source:filter.c
        -:    0:Graph:filter.gcno
        -:    0:Data:-
        -:    0:Runs:0
    #####:    1:int mul (int a, int b) {
    #####:    2:    return a * b;
        -:    3:}
    #####:    5:int sub (int a, int b) {
    #####:    6:    return a - b;

And json, for good measure:

$ gcov -t math --include=sum --json | jq ".files[].lines[]"
{
  "line_number": 9,
  "function_name": "sum",
  "count": 0,
  "unexecuted_block": true,
  "block_ids": [],
  "branches": [],
  "calls": []
}
{
  "line_number": 10,
  "function_name": "sum",
  "count": 0,
  "unexecuted_block": true,
  "block_ids": [
    2
  ],
  "branches": [],
  "calls": []
}

Note that the last function gets "clipped" when lines are associated to
functions, which means the closing brace is dropped from the report. I
hope this can be fixed, but considering it is not really a part of the
function body, the gcov report is "complete".

Matching generally work well for mangled names, as the mangled names
also have the base symbol name in it. A possible extension to the
filtering commands would be to mix it with demangling to more nicely
being able to filter specific overloads, without manually having to
mangle the interesting symbols. The g++.dg/gcov/gcov-20.C test tests the
matching of a mangled name.

The dejagnu testing function verify-calls is somewhat minimal, but does
the job well enough.

Why not just use grep? grep is not really sufficient, as grep is very
line oriented, and the reports that benefit the most from filtering
often span multiple lines, unpredictably.
---
 gcc/gcov.cc                            | 101 +++++++++++++++++++++++--
 gcc/testsuite/g++.dg/gcov/gcov-19.C    |  35 +++++++++
 gcc/testsuite/g++.dg/gcov/gcov-20.C    |  38 ++++++++++
 gcc/testsuite/gcc.misc-tests/gcov-24.c |  20 +++++
 gcc/testsuite/gcc.misc-tests/gcov-25.c |  23 ++++++
 gcc/testsuite/gcc.misc-tests/gcov-26.c |  23 ++++++
 gcc/testsuite/gcc.misc-tests/gcov-27.c |  22 ++++++
 gcc/testsuite/lib/gcov.exp             |  53 ++++++++++++-
 8 files changed, 306 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-19.C
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-20.C
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-24.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-25.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-26.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-27.c
diff mbox series

Patch

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index fad704eb7c9..e53765de00a 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -46,6 +46,7 @@  along with Gcov; see the file COPYING3.  If not see
 #include "color-macros.h"
 #include "pretty-print.h"
 #include "json.h"
+#include "xregex.h"
 
 #include <zlib.h>
 #include <getopt.h>
@@ -643,6 +644,43 @@  static int flag_counts = 0;
 /* Return code of the tool invocation.  */
 static int return_code = 0;
 
+/* "Keep policy" when adding functions to the global function table.  This will
+   be set to false when --include is used, otherwise every function should be
+   added to the table.  Used for --include/exclude.  */
+static bool default_keep = true;
+
+/* A 'function filter', a filter and action for determining if a function
+   should be included in the output or not.  Used for --include/--exclude
+   filtering.  */
+struct fnfilter
+{
+  /* The (extended) compiled regex for this filter.  */
+  regex_t regex;
+
+  /* The action when this filter (regex) matches - if true, the function should
+     be kept, otherwise discarded.  */
+  bool keep;
+
+  /* Compile the regex EXPR, or exit if pattern is malformed.  */
+  void compile (const char *expr)
+  {
+    int err = regcomp (&regex, expr, REG_NOSUB | REG_EXTENDED);
+    if (err)
+      {
+	size_t len = regerror (err, &regex, nullptr, 0);
+	char *msg = XNEWVEC (char, len);
+	regerror (err, &regex, msg, len);
+	fprintf (stderr, "Bad regular expression: %s\n", msg);
+	free (msg);
+	exit (EXIT_FAILURE);
+    }
+  }
+};
+
+/* A collection of filter functions for including/exclude functions in the
+   output.  This is empty unless --include/--exclude is used.  */
+static vector<fnfilter> filters;
+
 /* Forward declarations.  */
 static int process_args (int, char **);
 static void print_usage (int) ATTRIBUTE_NORETURN;
@@ -933,6 +971,8 @@  print_usage (int error_p)
   fnotice (file, "  -d, --display-progress          Display progress information\n");
   fnotice (file, "  -D, --debug			    Display debugging dumps\n");
   fnotice (file, "  -f, --function-summaries        Output summaries for each function\n");
+  fnotice (file, "      --include                   Include functions matching this regex\n");
+  fnotice (file, "      --exclude                   Exclude functions matching this regex\n");
   fnotice (file, "  -h, --help                      Print this help, then exit\n");
   fnotice (file, "  -j, --json-format               Output JSON intermediate format\n\
                                     into .gcov.json.gz file\n");
@@ -984,6 +1024,8 @@  static const struct option options[] =
   { "branch-probabilities", no_argument,       NULL, 'b' },
   { "branch-counts",        no_argument,       NULL, 'c' },
   { "json-format",	    no_argument,       NULL, 'j' },
+  { "include",              required_argument, NULL, 'I' },
+  { "exclude",              required_argument, NULL, 'E' },
   { "human-readable",	    no_argument,       NULL, 'H' },
   { "no-output",            no_argument,       NULL, 'n' },
   { "long-file-names",      no_argument,       NULL, 'l' },
@@ -1034,6 +1076,17 @@  process_args (int argc, char **argv)
 	case 'l':
 	  flag_long_names = 1;
 	  break;
+	case 'I':
+	  default_keep = false;
+	  filters.push_back (fnfilter {});
+	  filters.back ().keep = true;
+	  filters.back ().compile (optarg);
+	  break;
+	case 'E':
+	  filters.push_back (fnfilter {});
+	  filters.back ().keep = false;
+	  filters.back ().compile (optarg);
+	  break;
 	case 'H':
 	  flag_human_readable_numbers = 1;
 	  break;
@@ -1641,6 +1694,9 @@  release_structures (void)
        it != functions.end (); it++)
     delete (*it);
 
+  for (fnfilter& filter : filters)
+    regfree (&filter.regex);
+
   sources.resize (0);
   names.resize (0);
   functions.resize (0);
@@ -1868,8 +1924,6 @@  read_graph_file (void)
 	  unsigned end_column = gcov_read_unsigned ();
 
 	  fn = new function_info ();
-	  functions.push_back (fn);
-	  ident_to_fn[ident] = fn;
 
 	  fn->m_name = function_name;
 	  fn->ident = ident;
@@ -1883,6 +1937,17 @@  read_graph_file (void)
 	  fn->artificial = artificial;
 
 	  current_tag = tag;
+
+	  bool keep = default_keep;
+	  for (fnfilter& fn : filters)
+	    if (regexec (&fn.regex, function_name, 0, nullptr, 0) == 0)
+	      keep = fn.keep;
+
+	  if (keep)
+	    {
+	      functions.push_back (fn);
+	      ident_to_fn[ident] = fn;
+	    }
 	}
       else if (fn && tag == GCOV_TAG_BLOCKS)
 	{
@@ -3182,11 +3247,14 @@  output_lines (FILE *gcov_file, const source_info *src)
 
   unsigned line_start_group = 0;
   vector<function_info *> *fns;
+  unsigned filtered_line_end = !filters.empty () ? 0 : source_lines.size ();
 
   for (unsigned line_num = 1; line_num <= source_lines.size (); line_num++)
     {
       if (line_num >= src->lines.size ())
 	{
+	  if (!filters.empty ())
+	    break;
 	  fprintf (gcov_file, "%9s:%5u", "-", line_num);
 	  print_source_line (gcov_file, source_lines, line_num);
 	  continue;
@@ -3205,11 +3273,26 @@  output_lines (FILE *gcov_file, const source_info *src)
 	      for (unsigned i = 0; i < fns->size (); i++)
 		if ((*fns)[i]->end_line > line_start_group)
 		  line_start_group = (*fns)[i]->end_line;
+
+	      /* When filtering, src->lines will be cut short for the last
+		 selected function.  To make sure the "overlapping function"
+		 section is printed too, adjust the end so that it is within
+		 src->lines.  */
+	      if (line_start_group >= src->lines.size ())
+		line_start_group = src->lines.size () - 1;
+
+	      if (!filters.empty ())
+		filtered_line_end = line_start_group;
 	    }
 	  else if (fns != NULL && fns->size () == 1)
 	    {
 	      function_info *fn = (*fns)[0];
 	      output_function_details (gcov_file, fn);
+
+	      /* If functions are filtered, only the matching functions will be in
+		 fns and there is no need for extra checking.  */
+	      if (!filters.empty ())
+		filtered_line_end = fn->end_line;
 	    }
 	}
 
@@ -3219,12 +3302,16 @@  output_lines (FILE *gcov_file, const source_info *src)
 	 Otherwise, print the execution count before the source line.
 	 There are 16 spaces of indentation added before the source
 	 line so that tabs won't be messed up.  */
-      output_line_beginning (gcov_file, line->exists, line->unexceptional,
-			     line->has_unexecuted_block, line->count,
-			     line_num, "=====", "#####", src->maximum_count);
+      if (line_num <= filtered_line_end)
+	{
+	  output_line_beginning (gcov_file, line->exists, line->unexceptional,
+				 line->has_unexecuted_block, line->count,
+				 line_num, "=====", "#####",
+				 src->maximum_count);
 
-      print_source_line (gcov_file, source_lines, line_num);
-      output_line_details (gcov_file, line, line_num);
+	  print_source_line (gcov_file, source_lines, line_num);
+	  output_line_details (gcov_file, line, line_num);
+	}
 
       if (line_start_group == line_num)
 	{
diff --git a/gcc/testsuite/g++.dg/gcov/gcov-19.C b/gcc/testsuite/g++.dg/gcov/gcov-19.C
new file mode 100644
index 00000000000..b10226592b4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov-19.C
@@ -0,0 +1,35 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* Filtering on the function base name generally works well, because it becomes
+   an unadultered part of the symbol.  */
+
+template <typename T>
+T
+fn1 (T x)
+{
+    /* fst */
+    return x;
+}
+
+template <typename T>
+T
+fn2 (T x)
+{
+    /* snd */
+    return 2 * x;
+}
+
+int
+main ()
+{
+    fn1 (2);
+    fn1 (2.0);
+    fn1 (2.0f);
+
+    fn2 (2);
+    fn2 (2.0);
+    fn2 (2.0f);
+}
+
+/* { dg-final { run-gcov { filters { fn1 } { fn2 } } { --include=fn1 gcov-19.C } } } */
diff --git a/gcc/testsuite/g++.dg/gcov/gcov-20.C b/gcc/testsuite/g++.dg/gcov/gcov-20.C
new file mode 100644
index 00000000000..efd8aa72d25
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov-20.C
@@ -0,0 +1,38 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* Filtering also works by targeting the mangled symbol directly, but the
+   subtlety is not really caught by the test framework.  Matching on fn1I[df]
+   prints the "overlapping blocks" of both the float and double instantiation,
+   but *not* the int instantiation.  The extra {} around the --include argument
+   is quoting for tcl/dejagnu.  */
+
+template <typename T>
+T
+fn1 (T x)
+{
+    /* fst */
+    return x;
+}
+
+template <typename T>
+T
+fn2 (T x)
+{
+    /* snd */
+    return 2 * x;
+}
+
+int
+main ()
+{
+    fn1 (2);
+    fn1 (2.0);
+    fn1 (2.0f);
+
+    fn2 (2);
+    fn2 (2.0);
+    fn2 (2.0f);
+}
+
+/* { dg-final { run-gcov { filters { fst } { snd } } { {--include=fn1I[fd]} gcov-20.C } } } */
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-24.c b/gcc/testsuite/gcc.misc-tests/gcov-24.c
new file mode 100644
index 00000000000..ce66b15c279
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-24.c
@@ -0,0 +1,20 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+int
+once (int x)
+{
+    /* fst */
+    return x;
+}
+
+int
+twice (int x)
+{
+    /* snd */
+    return x * 2;
+}
+
+int main () {}
+
+/* { dg-final { run-gcov { filters { fst } { snd main } } { --include=once gcov-24.c } } } */
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-25.c b/gcc/testsuite/gcc.misc-tests/gcov-25.c
new file mode 100644
index 00000000000..05cfc0f1d82
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-25.c
@@ -0,0 +1,23 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* Filters are considered in order with latest-wins, so if a function is
+   included and later excluded it should not show up.  */
+
+int
+fn1 (int x)
+{
+    /* fst */
+    return x;
+}
+
+int
+fn2 (int x)
+{
+    /* snd */
+    return x * 2;
+}
+
+int main () {}
+
+/* { dg-final { run-gcov { filters { snd } { fst main } } { --include=fn --exclude=1 gcov-25.c } } } */
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-26.c b/gcc/testsuite/gcc.misc-tests/gcov-26.c
new file mode 100644
index 00000000000..246e7b3ef57
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-26.c
@@ -0,0 +1,23 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* Filters are considered in order with latest-wins, so if a function is
+   excluded and later included it should show up.  */
+
+int
+fn1 (int x)
+{
+    /* fst */
+    return x;
+}
+
+int
+fn2 (int x)
+{
+    /* snd */
+    return x * 2;
+}
+
+int main () {}
+
+/* { dg-final { run-gcov { filters { fst snd } { main } } { --exclude=1 --include=fn gcov-26.c } } } */
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-27.c b/gcc/testsuite/gcc.misc-tests/gcov-27.c
new file mode 100644
index 00000000000..53619472c01
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-27.c
@@ -0,0 +1,22 @@ 
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+/* If only --exclude is used, other functions should be used by default.  */
+
+int
+fn1 (int x)
+{
+    /* fst */
+    return x;
+}
+
+int
+fn2 (int x)
+{
+    /* snd */
+    return x * 2;
+}
+
+int main () {}
+
+/* { dg-final { run-gcov { filters { fst snd } { main } } { --exclude=main gcov-27.c } } } */
diff --git a/gcc/testsuite/lib/gcov.exp b/gcc/testsuite/lib/gcov.exp
index fe6cd9635db..586c2cd1b40 100644
--- a/gcc/testsuite/lib/gcov.exp
+++ b/gcc/testsuite/lib/gcov.exp
@@ -249,6 +249,44 @@  proc verify-calls { testname testcase file } {
     return $failed
 }
 
+# Verify that report filtering includes and excludes the right functions.
+proc verify-filters { testname testcase file expected unexpected } {
+    set fd [open $file r]
+
+    set seen {}
+    set ex [concat $expected $unexpected]
+
+    while { [gets $fd line] >= 0 } {
+	foreach sym $ex {
+	    if [regexp "$sym" "$line"] {
+		lappend seen $sym
+	    }
+	}
+    }
+
+    set seen [lsort -unique $seen]
+
+    set expected [lmap key $expected {
+	if { $key in $seen } continue
+	set key
+    }]
+    set unexpected [lmap key $unexpected {
+	if { $key ni $seen } continue
+	set key
+    }]
+
+    foreach sym $expected {
+	fail "Did not see expected symbol '$sym'"
+    }
+
+    foreach sym $unexpected {
+	fail "Found unexpected symbol '$sym'"
+    }
+
+    close $fd
+    return [expr [llength $expected] + [llength $unexpected]]
+}
+
 proc gcov-pytest-format-line { args } {
     global subdir
 
@@ -323,6 +361,7 @@  proc run-gcov { args } {
     set gcov_verify_branches 0
     set gcov_verify_lines 1
     set gcov_verify_intermediate 0
+    set gcov_verify_filters 0
     set gcov_remove_gcda 0
     set xfailed 0
 
@@ -331,11 +370,16 @@  proc run-gcov { args } {
 	  set gcov_verify_calls 1
 	} elseif { $a == "branches" } {
 	  set gcov_verify_branches 1
+	} elseif { [lindex $a 0] == "filters" } {
+	  set gcov_verify_filters 1
+	  set verify_filters_expected [lindex $a 1]
+	  set verify_filters_unexpected [lindex $a 2]
 	} elseif { $a == "intermediate" } {
 	  set gcov_verify_intermediate 1
 	  set gcov_verify_calls 0
 	  set gcov_verify_branches 0
 	  set gcov_verify_lines 0
+	  set gcov_verify_filters 0
 	} elseif { $a == "remove-gcda" } {
 	  set gcov_remove_gcda 1
 	} elseif { $gcov_args == "" } {
@@ -415,15 +459,20 @@  proc run-gcov { args } {
     } else {
 	set ifailed 0
     }
+    if { $gcov_verify_filters } {
+	set ffailed [verify-filters $testname $testcase $testcase.gcov $verify_filters_expected $verify_filters_unexpected]
+    } else {
+	set ffailed 0
+    }
 
     # Report whether the gcov test passed or failed.  If there were
     # multiple failures then the message is a summary.
-    set tfailed [expr $lfailed + $bfailed + $cfailed + $ifailed]
+    set tfailed [expr $lfailed + $bfailed + $cfailed + $ifailed + $ffailed]
     if { $xfailed } {
 	setup_xfail "*-*-*"
     }
     if { $tfailed > 0 } {
-	fail "$testname gcov: $lfailed failures in line counts, $bfailed in branch percentages, $cfailed in return percentages, $ifailed in intermediate format"
+	fail "$testname gcov: $lfailed failures in line counts, $bfailed in branch percentages, $cfailed in return percentages, $ifailed in intermediate format, $ffailed failed in filters"
 	if { $xfailed } {
 	    clean-gcov $testcase
 	}