gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

Message ID	d46347bc-bf30-6c37-0420-38297aca3df8@suse.cz
State	New
Headers	show Return-Path: <gcc-patches-return-433054-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to:cc :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=yTr0nFnFhRT8e1DVBojcDmHxFKcm4yZ8BAFOO78gYX8TK7WsMa SRM0P0bgk369FuIRmb75FuQfqr5HaByJ3bJtOQHE2QhLqWRHwLFj2EOT2t24WuJT Mn5VyB59lZJER8HkAvqg37R6C8eh59+PhFEUaimKzdh5BfZ/pcAdGUTao= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org To: GCC Patches <gcc-patches@gcc.gnu.org> Cc: Jan Hubicka <hubicka@ucw.cz>, Pidgeot18@gmail.com, Nathan Sidwell <nathan@acm.org> From: =?UTF-8?Q?Martin_Li=c5=a1ka?= <mliska@suse.cz> Subject: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992) Message-ID: <d46347bc-bf30-6c37-0420-38297aca3df8@suse.cz> Date: Wed, 3 Aug 2016 15:31:59 +0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------DFC8F5F5B19C4470C69403D4"

From faf7fb72d439974de68eb672edc6d76424f6022d Mon Sep 17 00:00:00 2001 From: marxin <mliska@suse.cz> Date: Wed, 3 Aug 2016 09:56:45 +0200 Subject: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection (PR gcov-profile/67992) gcc/ChangeLog: 2016-08-03 Martin Liska <mliska@suse.cz> Joshua Cranmer <Pidgeot18@gmail.com> PR gcov-profile/67992 * gcov.c (line_t::has_block): New function. (handle_cycle): Likewise. (unblock): Likewise. (circuit): Likewise. (find_cycles): Likewise. (get_cycles_count): Likewise. (main): Fix GNU coding style. (output_intermediate_file): Likewise. (process_file): Likewise. (generate_results): Likewise. (release_structures): Likewise. (create_file_names): Likewise. (find_source): Likewise. (read_graph_file): Likewise. (find_exception_blocks): Likewise. (canonicalize_name): Likewise. (make_gcov_file_name): Likewise. (mangle_name): Likewise. (accumulate_line_counts): Use the new Hawick's algorithm. (output_branch_count): Fix GNU coding style. (read_line): Likewise. --- gcc/gcov.c | 378 +++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 229 insertions(+), 149 deletions(-) diff --git a/gcc/gcov.c b/gcc/gcov.c index 417b4f4..8855980 100644 --- a/gcc/gcov.c +++ b/gcc/gcov.c @@ -41,6 +41,11 @@ along with Gcov; see the file COPYING3. If not see #include <getopt.h> +#include <vector> +#include <algorithm> + +using namespace std; + #define IN_GCOV 1 #include "gcov-io.h" #include "gcov-io.c" @@ -222,6 +227,9 @@ typedef struct coverage_info typedef struct line_info { + /* Return true when NEEDLE is one of basic blocks the line belongs to. */ + bool has_block (block_t *needle); + gcov_type count; /* execution count */ union { @@ -235,6 +243,16 @@ typedef struct line_info unsigned unexceptional : 1; } line_t; +bool +line_t::has_block (block_t *needle) +{ + for (block_t *n = u.blocks; n; n = n->chain) + if (n == needle) + return true; + + return false; +} + /* Describes a file mentioned in the block graph. Contains an array of line info. */ @@ -407,6 +425,164 @@ static void release_structures (void); static void release_function (function_t *); extern int main (int, char **); +/* Cycle detection! + There are a bajillion algorithms that do this. Boost's function is named + hawick_cycles, so I used the algorithm by K. A. Hawick and H. A. James in + "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs" + (url at <http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf>). + + The basic algorithm is simple: effectively, we're finding all simple paths + in a subgraph (that shrinks every iteration). Duplicates are filtered by + "blocking" a path when a node is added to the path (this also prevents non- + simple paths)--the node is unblocked only when it participates in a cycle. + */ + +/* Flag that drives cycle detection after a negative cycle is seen. */ +static bool did_negate = false; + +/* Handle cycle identified by EDGES, where the function finds minimum cs_count + and subtract the value from all counts. The subtracted value is added + to COUNT. */ + +static void +handle_cycle (const vector<arc_t *> &edges, int64_t &count) +{ + /* Find the minimum edge of the cycle, and reduce all nodes in the cycle by + that amount. */ + int64_t cycle_count = INT64_MAX; + for (unsigned i = 0; i < edges.size (); i++) + { + int64_t ecount = edges[i]->cs_count; + if (cycle_count > ecount) + cycle_count = ecount; + } + count += cycle_count; + for (unsigned i = 0; i < edges.size (); i++) + edges[i]->cs_count -= cycle_count; + + if (cycle_count < 0) + did_negate = true; +} + +/* Unblock a block U from BLOCKED. Apart from that, iterate all blocks + blocked by U in BLOCK_LISTS. */ + +static void +unblock (block_t *u, vector<block_t *> &blocked, + vector<vector<block_t *> > &block_lists) +{ + vector<block_t *>::iterator it = find (blocked.begin (), blocked.end (), u); + if (it == blocked.end ()) + return; + + unsigned index = it - blocked.begin (); + blocked.erase (it); + + for (vector<block_t *>::iterator it2 = block_lists[index].begin (); + it2 != block_lists[index].end (); it2++) + unblock (*it2, blocked, block_lists); + for (unsigned j = 0; j < block_lists[index].size (); j++) + unblock (u, blocked, block_lists); + + block_lists.erase (block_lists.begin () + index); +} + +/* Find circuit going to block V, PATH is provisional seen cycle. + BLOCKED is vector of blocked vertices, BLOCK_LISTS contains vertices + blocked by a block. COUNT is accumulated count of the current LINE. */ + +static bool +circuit (block_t *v, vector<arc_t *> &path, block_t *start, + vector<block_t *> &blocked, vector<vector<block_t *>> &block_lists, + line_t &linfo, int64_t &count) +{ + bool found = false; + + /* Add v to the block list. */ + gcc_assert (find (blocked.begin (), blocked.end (), v) == blocked.end ()); + blocked.push_back (v); + block_lists.push_back (vector<block_t *> ()); + + for (arc_t *arc = v->succ; arc; arc = arc->succ_next) + { + block_t *w = arc->dst; + if (w < start || !linfo.has_block (w)) + continue; + + path.push_back (arc); + if (w == start) + { + /* Cycle has been found. */ + handle_cycle (path, count); + found = true; + } + else if (find (blocked.begin (), blocked.end (), w) == blocked.end ()) + found |= circuit (w, path, start, blocked, block_lists, linfo, count); + + path.pop_back (); + } + + if (found) + unblock (v, blocked, block_lists); + else + for (arc_t *arc = v->succ; arc; arc = arc->succ_next) + { + block_t *w = arc->dst; + if (w < start || !linfo.has_block (w)) + continue; + + size_t index + = find (blocked.begin (), blocked.end (), w) - blocked.begin (); + gcc_assert (index < blocked.size ()); + vector<block_t *> &list = block_lists[index]; + if (find (list.begin (), list.end (), v) == list.end ()) + list.push_back (v); + } + + return found; +} + +/* Find cycles for a LINFO. */ + +static gcov_type +find_cycles (line_t &linfo) +{ + /* Note that this algorithm works even if blocks aren't in sorted order. + Each iteration of the circuit detection is completely independent + (except for reducing counts, but that shouldn't matter anyways). + Therefore, operating on a permuted order (i.e., non-sorted) only + has the effect of permuting the output cycles. */ + + gcov_type count = 0; + for (block_t *block = linfo.u.blocks; block; block = block->chain) + { + vector<arc_t *> path; + vector<block_t *> blocked; + vector<vector<block_t *> > block_lists; + circuit (block, path, block, blocked, block_lists, linfo, count); + } + + return count; +} + +/* Get cycle count for a LINFO. */ + +static gcov_type +get_cycles_count (line_t &linfo) +{ + gcov_type count = 0; + + /* Bug compatibility with gcc's broken cycle-finder: if a negative cycle + exists, run the algorithm again. */ + + did_negate = false; + count = find_cycles (linfo); + if (did_negate) + count += find_cycles (linfo); + + return count; +} + int main (int argc, char **argv) { @@ -435,7 +611,7 @@ main (int argc, char **argv) names = XNEWVEC (name_map_t, a_names); a_sources = 10; sources = XNEWVEC (source_t, a_sources); - + argno = process_args (argc, argv); if (optind == argc) print_usage (true); @@ -444,12 +620,12 @@ main (int argc, char **argv) multiple_files = 1; first_arg = argno; - + for (; argno != argc; argno++) { if (flag_display_progress) - printf ("Processing file %d out of %d\n", - argno - first_arg + 1, argc - first_arg); + printf ("Processing file %d out of %d\n", argno - first_arg + 1, + argc - first_arg); process_file (argv[argno]); } @@ -459,7 +635,7 @@ main (int argc, char **argv) return 0; } - + /* Print a usage message and exit. If ERROR_P is nonzero, this is an error, otherwise the output of --help. */ @@ -671,8 +847,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src) { /* function:<name>,<line_number>,<execution_count> */ fprintf (gcov_file, "function:%d,%s,%s\n", fn->line, - format_gcov (fn->blocks[0].count, 0, -1), - flag_demangled_names ? fn->demangled_name : fn->name); + format_gcov (fn->blocks[0].count, 0, -1), + flag_demangled_names ? fn->demangled_name : fn->name); } for (line_num = 1, line = &src->lines[line_num]; @@ -681,8 +857,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src) { arc_t *arc; if (line->exists) - fprintf (gcov_file, "lcount:%u,%s\n", line_num, - format_gcov (line->count, 0, -1)); + fprintf (gcov_file, "lcount:%u,%s\n", line_num, + format_gcov (line->count, 0, -1)); if (flag_branches) for (arc = line->u.branches; arc; arc = arc->line_next) { @@ -705,7 +881,6 @@ output_intermediate_file (FILE *gcov_file, source_t *src) } } - /* Process a single input file. */ static void @@ -717,7 +892,7 @@ process_file (const char *file_name) fns = read_graph_file (); if (!fns) return; - + read_count_file (fns); while (fns) { @@ -767,7 +942,7 @@ process_file (const char *file_name) } if (line >= sources[src].num_lines) sources[src].num_lines = line + 1; - + solve_flow_graph (fn); if (fn->has_catch) find_exception_blocks (fn); @@ -848,15 +1023,14 @@ generate_results (const char *file_name) if (flag_gcov_file && flag_intermediate_format) { /* Open the intermediate file. */ - gcov_intermediate_filename = - get_gcov_intermediate_filename (file_name); + gcov_intermediate_filename = get_gcov_intermediate_filename (file_name); gcov_intermediate_file = fopen (gcov_intermediate_filename, "w"); if (!gcov_intermediate_file) - { - fnotice (stderr, "Cannot open intermediate output file %s\n", - gcov_intermediate_filename); - return; - } + { + fnotice (stderr, "Cannot open intermediate output file %s\n", + gcov_intermediate_filename); + return; + } } for (ix = n_sources, src = sources; ix--; src++) @@ -866,7 +1040,7 @@ generate_results (const char *file_name) /* Ignore this source, if it is an absolute path (after source prefix removal). */ char first = src->coverage.name[0]; - + #if HAVE_DOS_BASED_FILE_SYSTEM if (first && src->coverage.name[1] == ':') first = src->coverage.name[2]; @@ -874,7 +1048,7 @@ generate_results (const char *file_name) if (IS_DIR_SEPARATOR (first)) continue; } - + accumulate_line_counts (src); function_summary (&src->coverage, "File"); total_lines += src->coverage.lines; @@ -938,7 +1112,7 @@ release_structures (void) for (ix = n_sources; ix--;) free (sources[ix].lines); free (sources); - + for (ix = n_names; ix--;) free (names[ix].name); free (names); @@ -982,7 +1156,7 @@ create_file_names (const char *file_name) base = !stat (object_directory, &status) && S_ISDIR (status.st_mode); strcat (name, object_directory); - if (base && (! IS_DIR_SEPARATOR (name[strlen (name) - 1]))) + if (base && (!IS_DIR_SEPARATOR (name[strlen (name) - 1]))) strcat (name, "/"); } else @@ -1075,16 +1249,16 @@ find_source (const char *file_name) free (names); names = name_map; } - + /* Not found, try the canonical name. */ canon = canonicalize_name (file_name); - name_map = (name_map_t *)bsearch - (canon, names, n_names, sizeof (*names), name_search); + name_map = (name_map_t *) bsearch (canon, names, n_names, sizeof (*names), + name_search); if (!name_map) { /* Not found with canonical name, create a new source. */ source_t *src; - + if (n_sources == a_sources) { a_sources *= 2; @@ -1210,12 +1384,12 @@ read_graph_file (void) fn = XCNEW (function_t); fn->name = function_name; - if (flag_demangled_names) - { - fn->demangled_name = cplus_demangle (fn->name, DMGL_PARAMS); - if (!fn->demangled_name) - fn->demangled_name = fn->name; - } + if (flag_demangled_names) + { + fn->demangled_name = cplus_demangle (fn->name, DMGL_PARAMS); + if (!fn->demangled_name) + fn->demangled_name = fn->name; + } fn->ident = ident; fn->lineno_checksum = lineno_checksum; fn->cfg_checksum = cfg_checksum; @@ -1293,7 +1467,7 @@ read_graph_file (void) else { /* Non-local return from a callee of this - function. The destination block is a setjmp. */ + function. The destination block is a setjmp. */ arc->is_nonlocal_return = 1; fn->blocks[dest].is_nonlocal_return = 1; } @@ -1302,7 +1476,7 @@ read_graph_file (void) if (!arc->on_tree) fn->num_counts++; } - + if (mark_catches) { /* We have a fake exit from this block. The other @@ -1774,7 +1948,7 @@ find_exception_blocks (function_t *fn) { block_t *block = queue[--ix]; const arc_t *arc; - + for (arc = block->succ; arc; arc = arc->succ_next) if (!arc->fake && !arc->is_throw && arc->dst->exceptional) { @@ -1783,7 +1957,6 @@ find_exception_blocks (function_t *fn) } } } - /* Increment totals in COVERAGE according to arc ARC. */ @@ -1862,7 +2035,7 @@ executed_summary (unsigned lines, unsigned executed) else fnotice (stdout, "No executable lines\n"); } - + /* Output summary info for a function or file. */ static void @@ -1921,7 +2094,7 @@ canonicalize_name (const char *name) for (dd_base = ptr; *base; base = probe) { size_t len; - + for (probe = base; *probe; probe++) if (IS_DIR_SEPARATOR (*probe)) break; @@ -1942,7 +2115,7 @@ canonicalize_name (const char *name) /* S_ISLNK is not POSIX.1-1996. */ || stat (result, &buf) || S_ISLNK (buf.st_mode) #endif - ) + ) { /* Cannot elide, or unreadable or a symlink. */ dd_base = ptr + 2 + slash; @@ -1993,7 +2166,7 @@ make_gcov_file_name (const char *input_name, const char *src_name) { /* Generate the input filename part. */ result = XNEWVEC (char, strlen (input_name) + strlen (src_name) + 10); - + ptr = result; ptr = mangle_name (input_name, ptr); ptr[0] = ptr[1] = '#'; @@ -2007,7 +2180,7 @@ make_gcov_file_name (const char *input_name, const char *src_name) ptr = mangle_name (src_name, ptr); strcpy (ptr, ".gcov"); - + return result; } @@ -2015,7 +2188,7 @@ static char * mangle_name (char const *base, char *ptr) { size_t len; - + /* Generate the source filename part. */ if (!flag_preserve_paths) { @@ -2061,7 +2234,7 @@ mangle_name (char const *base, char *ptr) } } } - + return ptr; } @@ -2152,8 +2325,7 @@ accumulate_line_counts (source_t *src) unsigned ix; /* Reverse the function order. */ - for (fn = src->functions, fn_p = NULL; fn; - fn_p = fn, fn = fn_n) + for (fn = src->functions, fn_p = NULL; fn; fn_p = fn, fn = fn_n) { fn_n = fn->line_next; fn->line_next = fn_p; @@ -2204,113 +2376,22 @@ accumulate_line_counts (source_t *src) arc_t *arc; for (arc = block->pred; arc; arc = arc->pred_next) - { - if (arc->src->u.cycle.ident != ix) - count += arc->count; - if (flag_branches) - add_branch_counts (&src->coverage, arc); - } - - /* Initialize the cs_count. */ - for (arc = block->succ; arc; arc = arc->succ_next) - arc->cs_count = arc->count; + if (flag_branches) + add_branch_counts (&src->coverage, arc); } - /* Find the loops. This uses the algorithm described in - Tiernan 'An Efficient Search Algorithm to Find the - Elementary Circuits of a Graph', CACM Dec 1970. We hold - the P array by having each block point to the arc that - connects to the previous block. The H array is implicitly - held because of the arc ordering, and the block's - previous arc pointer. - - Although the algorithm is O(N^3) for highly connected - graphs, at worst we'll have O(N^2), as most blocks have - only one or two exits. Most graphs will be small. - - For each loop we find, locate the arc with the smallest - transition count, and add that to the cumulative - count. Decrease flow over the cycle and remove the arc - from consideration. */ + /* Cycle detection. */ for (block = line->u.blocks; block; block = block->chain) { - block_t *head = block; - arc_t *arc; - - next_vertex:; - arc = head->succ; - current_vertex:; - while (arc) - { - block_t *dst = arc->dst; - if (/* Already used that arc. */ - arc->cycle - /* Not to same graph, or before first vertex. */ - || dst->u.cycle.ident != ix - /* Already in path. */ - || dst->u.cycle.arc) - { - arc = arc->succ_next; - continue; - } - - if (dst == block) - { - /* Found a closing arc. */ - gcov_type cycle_count = arc->cs_count; - arc_t *cycle_arc = arc; - arc_t *probe_arc; - - /* Locate the smallest arc count of the loop. */ - for (dst = head; (probe_arc = dst->u.cycle.arc); - dst = probe_arc->src) - if (cycle_count > probe_arc->cs_count) - { - cycle_count = probe_arc->cs_count; - cycle_arc = probe_arc; - } - - count += cycle_count; - cycle_arc->cycle = 1; - - /* Remove the flow from the cycle. */ - arc->cs_count -= cycle_count; - for (dst = head; (probe_arc = dst->u.cycle.arc); - dst = probe_arc->src) - probe_arc->cs_count -= cycle_count; - - /* Unwind to the cyclic arc. */ - while (head != cycle_arc->src) - { - arc = head->u.cycle.arc; - head->u.cycle.arc = NULL; - head = arc->src; - } - /* Move on. */ - arc = arc->succ_next; - continue; - } - - /* Add new block to chain. */ - dst->u.cycle.arc = arc; - head = dst; - goto next_vertex; - } - /* We could not add another vertex to the path. Remove - the last vertex from the list. */ - arc = head->u.cycle.arc; - if (arc) - { - /* It was not the first vertex. Move onto next arc. */ - head->u.cycle.arc = NULL; - head = arc->src; - arc = arc->succ_next; - goto current_vertex; - } - /* Mark this block as unusable. */ - block->u.cycle.ident = ~0U; + for (arc_t *arc = block->pred; arc; arc = arc->pred_next) + if (!line->has_block (arc->src)) + count += arc->count; + for (arc_t *arc = block->succ; arc; arc = arc->succ_next) + arc->cs_count = arc->count; } + //* Now, add the count of loops entirely on this line. */ + count += get_cycles_count (*line); line->count = count; } @@ -2361,7 +2442,6 @@ output_branch_count (FILE *gcov_file, int ix, const arc_t *arc) else return 0; return 1; - } static const char * @@ -2391,7 +2471,7 @@ read_line (FILE *file) string = XRESIZEVEC (char, string, string_len * 2); string_len *= 2; } - + return pos ? string : NULL; } -- 2.9.2

gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

Commit Message

Comments

Patch