From faf7fb72d439974de68eb672edc6d76424f6022d Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Wed, 3 Aug 2016 09:56:45 +0200
Subject: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection
(PR gcov-profile/67992)
gcc/ChangeLog:
2016-08-03 Martin Liska <mliska@suse.cz>
Joshua Cranmer <Pidgeot18@gmail.com>
PR gcov-profile/67992
* gcov.c (line_t::has_block): New function.
(handle_cycle): Likewise.
(unblock): Likewise.
(circuit): Likewise.
(find_cycles): Likewise.
(get_cycles_count): Likewise.
(main): Fix GNU coding style.
(output_intermediate_file): Likewise.
(process_file): Likewise.
(generate_results): Likewise.
(release_structures): Likewise.
(create_file_names): Likewise.
(find_source): Likewise.
(read_graph_file): Likewise.
(find_exception_blocks): Likewise.
(canonicalize_name): Likewise.
(make_gcov_file_name): Likewise.
(mangle_name): Likewise.
(accumulate_line_counts): Use the new Hawick's algorithm.
(output_branch_count): Fix GNU coding style.
(read_line): Likewise.
---
gcc/gcov.c | 378 +++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 229 insertions(+), 149 deletions(-)
@@ -41,6 +41,11 @@ along with Gcov; see the file COPYING3. If not see
#include <getopt.h>
+#include <vector>
+#include <algorithm>
+
+using namespace std;
+
#define IN_GCOV 1
#include "gcov-io.h"
#include "gcov-io.c"
@@ -222,6 +227,9 @@ typedef struct coverage_info
typedef struct line_info
{
+ /* Return true when NEEDLE is one of basic blocks the line belongs to. */
+ bool has_block (block_t *needle);
+
gcov_type count; /* execution count */
union
{
@@ -235,6 +243,16 @@ typedef struct line_info
unsigned unexceptional : 1;
} line_t;
+bool
+line_t::has_block (block_t *needle)
+{
+ for (block_t *n = u.blocks; n; n = n->chain)
+ if (n == needle)
+ return true;
+
+ return false;
+}
+
/* Describes a file mentioned in the block graph. Contains an array
of line info. */
@@ -407,6 +425,164 @@ static void release_structures (void);
static void release_function (function_t *);
extern int main (int, char **);
+/* Cycle detection!
+ There are a bajillion algorithms that do this. Boost's function is named
+ hawick_cycles, so I used the algorithm by K. A. Hawick and H. A. James in
+ "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+ (url at <http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf>).
+
+ The basic algorithm is simple: effectively, we're finding all simple paths
+ in a subgraph (that shrinks every iteration). Duplicates are filtered by
+ "blocking" a path when a node is added to the path (this also prevents non-
+ simple paths)--the node is unblocked only when it participates in a cycle.
+ */
+
+/* Flag that drives cycle detection after a negative cycle is seen. */
+static bool did_negate = false;
+
+/* Handle cycle identified by EDGES, where the function finds minimum cs_count
+ and subtract the value from all counts. The subtracted value is added
+ to COUNT. */
+
+static void
+handle_cycle (const vector<arc_t *> &edges, int64_t &count)
+{
+ /* Find the minimum edge of the cycle, and reduce all nodes in the cycle by
+ that amount. */
+ int64_t cycle_count = INT64_MAX;
+ for (unsigned i = 0; i < edges.size (); i++)
+ {
+ int64_t ecount = edges[i]->cs_count;
+ if (cycle_count > ecount)
+ cycle_count = ecount;
+ }
+ count += cycle_count;
+ for (unsigned i = 0; i < edges.size (); i++)
+ edges[i]->cs_count -= cycle_count;
+
+ if (cycle_count < 0)
+ did_negate = true;
+}
+
+/* Unblock a block U from BLOCKED. Apart from that, iterate all blocks
+ blocked by U in BLOCK_LISTS. */
+
+static void
+unblock (block_t *u, vector<block_t *> &blocked,
+ vector<vector<block_t *> > &block_lists)
+{
+ vector<block_t *>::iterator it = find (blocked.begin (), blocked.end (), u);
+ if (it == blocked.end ())
+ return;
+
+ unsigned index = it - blocked.begin ();
+ blocked.erase (it);
+
+ for (vector<block_t *>::iterator it2 = block_lists[index].begin ();
+ it2 != block_lists[index].end (); it2++)
+ unblock (*it2, blocked, block_lists);
+ for (unsigned j = 0; j < block_lists[index].size (); j++)
+ unblock (u, blocked, block_lists);
+
+ block_lists.erase (block_lists.begin () + index);
+}
+
+/* Find circuit going to block V, PATH is provisional seen cycle.
+ BLOCKED is vector of blocked vertices, BLOCK_LISTS contains vertices
+ blocked by a block. COUNT is accumulated count of the current LINE. */
+
+static bool
+circuit (block_t *v, vector<arc_t *> &path, block_t *start,
+ vector<block_t *> &blocked, vector<vector<block_t *>> &block_lists,
+ line_t &linfo, int64_t &count)
+{
+ bool found = false;
+
+ /* Add v to the block list. */
+ gcc_assert (find (blocked.begin (), blocked.end (), v) == blocked.end ());
+ blocked.push_back (v);
+ block_lists.push_back (vector<block_t *> ());
+
+ for (arc_t *arc = v->succ; arc; arc = arc->succ_next)
+ {
+ block_t *w = arc->dst;
+ if (w < start || !linfo.has_block (w))
+ continue;
+
+ path.push_back (arc);
+ if (w == start)
+ {
+ /* Cycle has been found. */
+ handle_cycle (path, count);
+ found = true;
+ }
+ else if (find (blocked.begin (), blocked.end (), w) == blocked.end ())
+ found |= circuit (w, path, start, blocked, block_lists, linfo, count);
+
+ path.pop_back ();
+ }
+
+ if (found)
+ unblock (v, blocked, block_lists);
+ else
+ for (arc_t *arc = v->succ; arc; arc = arc->succ_next)
+ {
+ block_t *w = arc->dst;
+ if (w < start || !linfo.has_block (w))
+ continue;
+
+ size_t index
+ = find (blocked.begin (), blocked.end (), w) - blocked.begin ();
+ gcc_assert (index < blocked.size ());
+ vector<block_t *> &list = block_lists[index];
+ if (find (list.begin (), list.end (), v) == list.end ())
+ list.push_back (v);
+ }
+
+ return found;
+}
+
+/* Find cycles for a LINFO. */
+
+static gcov_type
+find_cycles (line_t &linfo)
+{
+ /* Note that this algorithm works even if blocks aren't in sorted order.
+ Each iteration of the circuit detection is completely independent
+ (except for reducing counts, but that shouldn't matter anyways).
+ Therefore, operating on a permuted order (i.e., non-sorted) only
+ has the effect of permuting the output cycles. */
+
+ gcov_type count = 0;
+ for (block_t *block = linfo.u.blocks; block; block = block->chain)
+ {
+ vector<arc_t *> path;
+ vector<block_t *> blocked;
+ vector<vector<block_t *> > block_lists;
+ circuit (block, path, block, blocked, block_lists, linfo, count);
+ }
+
+ return count;
+}
+
+/* Get cycle count for a LINFO. */
+
+static gcov_type
+get_cycles_count (line_t &linfo)
+{
+ gcov_type count = 0;
+
+ /* Bug compatibility with gcc's broken cycle-finder: if a negative cycle
+ exists, run the algorithm again. */
+
+ did_negate = false;
+ count = find_cycles (linfo);
+ if (did_negate)
+ count += find_cycles (linfo);
+
+ return count;
+}
+
int
main (int argc, char **argv)
{
@@ -435,7 +611,7 @@ main (int argc, char **argv)
names = XNEWVEC (name_map_t, a_names);
a_sources = 10;
sources = XNEWVEC (source_t, a_sources);
-
+
argno = process_args (argc, argv);
if (optind == argc)
print_usage (true);
@@ -444,12 +620,12 @@ main (int argc, char **argv)
multiple_files = 1;
first_arg = argno;
-
+
for (; argno != argc; argno++)
{
if (flag_display_progress)
- printf ("Processing file %d out of %d\n",
- argno - first_arg + 1, argc - first_arg);
+ printf ("Processing file %d out of %d\n", argno - first_arg + 1,
+ argc - first_arg);
process_file (argv[argno]);
}
@@ -459,7 +635,7 @@ main (int argc, char **argv)
return 0;
}
-
+
/* Print a usage message and exit. If ERROR_P is nonzero, this is an error,
otherwise the output of --help. */
@@ -671,8 +847,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
{
/* function:<name>,<line_number>,<execution_count> */
fprintf (gcov_file, "function:%d,%s,%s\n", fn->line,
- format_gcov (fn->blocks[0].count, 0, -1),
- flag_demangled_names ? fn->demangled_name : fn->name);
+ format_gcov (fn->blocks[0].count, 0, -1),
+ flag_demangled_names ? fn->demangled_name : fn->name);
}
for (line_num = 1, line = &src->lines[line_num];
@@ -681,8 +857,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
{
arc_t *arc;
if (line->exists)
- fprintf (gcov_file, "lcount:%u,%s\n", line_num,
- format_gcov (line->count, 0, -1));
+ fprintf (gcov_file, "lcount:%u,%s\n", line_num,
+ format_gcov (line->count, 0, -1));
if (flag_branches)
for (arc = line->u.branches; arc; arc = arc->line_next)
{
@@ -705,7 +881,6 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
}
}
-
/* Process a single input file. */
static void
@@ -717,7 +892,7 @@ process_file (const char *file_name)
fns = read_graph_file ();
if (!fns)
return;
-
+
read_count_file (fns);
while (fns)
{
@@ -767,7 +942,7 @@ process_file (const char *file_name)
}
if (line >= sources[src].num_lines)
sources[src].num_lines = line + 1;
-
+
solve_flow_graph (fn);
if (fn->has_catch)
find_exception_blocks (fn);
@@ -848,15 +1023,14 @@ generate_results (const char *file_name)
if (flag_gcov_file && flag_intermediate_format)
{
/* Open the intermediate file. */
- gcov_intermediate_filename =
- get_gcov_intermediate_filename (file_name);
+ gcov_intermediate_filename = get_gcov_intermediate_filename (file_name);
gcov_intermediate_file = fopen (gcov_intermediate_filename, "w");
if (!gcov_intermediate_file)
- {
- fnotice (stderr, "Cannot open intermediate output file %s\n",
- gcov_intermediate_filename);
- return;
- }
+ {
+ fnotice (stderr, "Cannot open intermediate output file %s\n",
+ gcov_intermediate_filename);
+ return;
+ }
}
for (ix = n_sources, src = sources; ix--; src++)
@@ -866,7 +1040,7 @@ generate_results (const char *file_name)
/* Ignore this source, if it is an absolute path (after
source prefix removal). */
char first = src->coverage.name[0];
-
+
#if HAVE_DOS_BASED_FILE_SYSTEM
if (first && src->coverage.name[1] == ':')
first = src->coverage.name[2];
@@ -874,7 +1048,7 @@ generate_results (const char *file_name)
if (IS_DIR_SEPARATOR (first))
continue;
}
-
+
accumulate_line_counts (src);
function_summary (&src->coverage, "File");
total_lines += src->coverage.lines;
@@ -938,7 +1112,7 @@ release_structures (void)
for (ix = n_sources; ix--;)
free (sources[ix].lines);
free (sources);
-
+
for (ix = n_names; ix--;)
free (names[ix].name);
free (names);
@@ -982,7 +1156,7 @@ create_file_names (const char *file_name)
base = !stat (object_directory, &status) && S_ISDIR (status.st_mode);
strcat (name, object_directory);
- if (base && (! IS_DIR_SEPARATOR (name[strlen (name) - 1])))
+ if (base && (!IS_DIR_SEPARATOR (name[strlen (name) - 1])))
strcat (name, "/");
}
else
@@ -1075,16 +1249,16 @@ find_source (const char *file_name)
free (names);
names = name_map;
}
-
+
/* Not found, try the canonical name. */
canon = canonicalize_name (file_name);
- name_map = (name_map_t *)bsearch
- (canon, names, n_names, sizeof (*names), name_search);
+ name_map = (name_map_t *) bsearch (canon, names, n_names, sizeof (*names),
+ name_search);
if (!name_map)
{
/* Not found with canonical name, create a new source. */
source_t *src;
-
+
if (n_sources == a_sources)
{
a_sources *= 2;
@@ -1210,12 +1384,12 @@ read_graph_file (void)
fn = XCNEW (function_t);
fn->name = function_name;
- if (flag_demangled_names)
- {
- fn->demangled_name = cplus_demangle (fn->name, DMGL_PARAMS);
- if (!fn->demangled_name)
- fn->demangled_name = fn->name;
- }
+ if (flag_demangled_names)
+ {
+ fn->demangled_name = cplus_demangle (fn->name, DMGL_PARAMS);
+ if (!fn->demangled_name)
+ fn->demangled_name = fn->name;
+ }
fn->ident = ident;
fn->lineno_checksum = lineno_checksum;
fn->cfg_checksum = cfg_checksum;
@@ -1293,7 +1467,7 @@ read_graph_file (void)
else
{
/* Non-local return from a callee of this
- function. The destination block is a setjmp. */
+ function. The destination block is a setjmp. */
arc->is_nonlocal_return = 1;
fn->blocks[dest].is_nonlocal_return = 1;
}
@@ -1302,7 +1476,7 @@ read_graph_file (void)
if (!arc->on_tree)
fn->num_counts++;
}
-
+
if (mark_catches)
{
/* We have a fake exit from this block. The other
@@ -1774,7 +1948,7 @@ find_exception_blocks (function_t *fn)
{
block_t *block = queue[--ix];
const arc_t *arc;
-
+
for (arc = block->succ; arc; arc = arc->succ_next)
if (!arc->fake && !arc->is_throw && arc->dst->exceptional)
{
@@ -1783,7 +1957,6 @@ find_exception_blocks (function_t *fn)
}
}
}
-
/* Increment totals in COVERAGE according to arc ARC. */
@@ -1862,7 +2035,7 @@ executed_summary (unsigned lines, unsigned executed)
else
fnotice (stdout, "No executable lines\n");
}
-
+
/* Output summary info for a function or file. */
static void
@@ -1921,7 +2094,7 @@ canonicalize_name (const char *name)
for (dd_base = ptr; *base; base = probe)
{
size_t len;
-
+
for (probe = base; *probe; probe++)
if (IS_DIR_SEPARATOR (*probe))
break;
@@ -1942,7 +2115,7 @@ canonicalize_name (const char *name)
/* S_ISLNK is not POSIX.1-1996. */
|| stat (result, &buf) || S_ISLNK (buf.st_mode)
#endif
- )
+ )
{
/* Cannot elide, or unreadable or a symlink. */
dd_base = ptr + 2 + slash;
@@ -1993,7 +2166,7 @@ make_gcov_file_name (const char *input_name, const char *src_name)
{
/* Generate the input filename part. */
result = XNEWVEC (char, strlen (input_name) + strlen (src_name) + 10);
-
+
ptr = result;
ptr = mangle_name (input_name, ptr);
ptr[0] = ptr[1] = '#';
@@ -2007,7 +2180,7 @@ make_gcov_file_name (const char *input_name, const char *src_name)
ptr = mangle_name (src_name, ptr);
strcpy (ptr, ".gcov");
-
+
return result;
}
@@ -2015,7 +2188,7 @@ static char *
mangle_name (char const *base, char *ptr)
{
size_t len;
-
+
/* Generate the source filename part. */
if (!flag_preserve_paths)
{
@@ -2061,7 +2234,7 @@ mangle_name (char const *base, char *ptr)
}
}
}
-
+
return ptr;
}
@@ -2152,8 +2325,7 @@ accumulate_line_counts (source_t *src)
unsigned ix;
/* Reverse the function order. */
- for (fn = src->functions, fn_p = NULL; fn;
- fn_p = fn, fn = fn_n)
+ for (fn = src->functions, fn_p = NULL; fn; fn_p = fn, fn = fn_n)
{
fn_n = fn->line_next;
fn->line_next = fn_p;
@@ -2204,113 +2376,22 @@ accumulate_line_counts (source_t *src)
arc_t *arc;
for (arc = block->pred; arc; arc = arc->pred_next)
- {
- if (arc->src->u.cycle.ident != ix)
- count += arc->count;
- if (flag_branches)
- add_branch_counts (&src->coverage, arc);
- }
-
- /* Initialize the cs_count. */
- for (arc = block->succ; arc; arc = arc->succ_next)
- arc->cs_count = arc->count;
+ if (flag_branches)
+ add_branch_counts (&src->coverage, arc);
}
- /* Find the loops. This uses the algorithm described in
- Tiernan 'An Efficient Search Algorithm to Find the
- Elementary Circuits of a Graph', CACM Dec 1970. We hold
- the P array by having each block point to the arc that
- connects to the previous block. The H array is implicitly
- held because of the arc ordering, and the block's
- previous arc pointer.
-
- Although the algorithm is O(N^3) for highly connected
- graphs, at worst we'll have O(N^2), as most blocks have
- only one or two exits. Most graphs will be small.
-
- For each loop we find, locate the arc with the smallest
- transition count, and add that to the cumulative
- count. Decrease flow over the cycle and remove the arc
- from consideration. */
+ /* Cycle detection. */
for (block = line->u.blocks; block; block = block->chain)
{
- block_t *head = block;
- arc_t *arc;
-
- next_vertex:;
- arc = head->succ;
- current_vertex:;
- while (arc)
- {
- block_t *dst = arc->dst;
- if (/* Already used that arc. */
- arc->cycle
- /* Not to same graph, or before first vertex. */
- || dst->u.cycle.ident != ix
- /* Already in path. */
- || dst->u.cycle.arc)
- {
- arc = arc->succ_next;
- continue;
- }
-
- if (dst == block)
- {
- /* Found a closing arc. */
- gcov_type cycle_count = arc->cs_count;
- arc_t *cycle_arc = arc;
- arc_t *probe_arc;
-
- /* Locate the smallest arc count of the loop. */
- for (dst = head; (probe_arc = dst->u.cycle.arc);
- dst = probe_arc->src)
- if (cycle_count > probe_arc->cs_count)
- {
- cycle_count = probe_arc->cs_count;
- cycle_arc = probe_arc;
- }
-
- count += cycle_count;
- cycle_arc->cycle = 1;
-
- /* Remove the flow from the cycle. */
- arc->cs_count -= cycle_count;
- for (dst = head; (probe_arc = dst->u.cycle.arc);
- dst = probe_arc->src)
- probe_arc->cs_count -= cycle_count;
-
- /* Unwind to the cyclic arc. */
- while (head != cycle_arc->src)
- {
- arc = head->u.cycle.arc;
- head->u.cycle.arc = NULL;
- head = arc->src;
- }
- /* Move on. */
- arc = arc->succ_next;
- continue;
- }
-
- /* Add new block to chain. */
- dst->u.cycle.arc = arc;
- head = dst;
- goto next_vertex;
- }
- /* We could not add another vertex to the path. Remove
- the last vertex from the list. */
- arc = head->u.cycle.arc;
- if (arc)
- {
- /* It was not the first vertex. Move onto next arc. */
- head->u.cycle.arc = NULL;
- head = arc->src;
- arc = arc->succ_next;
- goto current_vertex;
- }
- /* Mark this block as unusable. */
- block->u.cycle.ident = ~0U;
+ for (arc_t *arc = block->pred; arc; arc = arc->pred_next)
+ if (!line->has_block (arc->src))
+ count += arc->count;
+ for (arc_t *arc = block->succ; arc; arc = arc->succ_next)
+ arc->cs_count = arc->count;
}
+ //* Now, add the count of loops entirely on this line. */
+ count += get_cycles_count (*line);
line->count = count;
}
@@ -2361,7 +2442,6 @@ output_branch_count (FILE *gcov_file, int ix, const arc_t *arc)
else
return 0;
return 1;
-
}
static const char *
@@ -2391,7 +2471,7 @@ read_line (FILE *file)
string = XRESIZEVEC (char, string, string_len * 2);
string_len *= 2;
}
-
+
return pos ? string : NULL;
}
--
2.9.2