[26/49] analyzer: new files: digraph.{cc|h} and shortest-paths.h
diff mbox series

Message ID 1573867416-55618-27-git-send-email-dmalcolm@redhat.com
State New
Headers show
Series
  • RFC: Add a static analysis framework to GCC
Related show

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m. UTC
This patch adds template classes for directed graphs, their nodes
and edges, and for finding the shortest path through such a graph.

gcc/ChangeLog:
	* analyzer/digraph.cc: New file.
	* analyzer/digraph.h: New file.
	* analyzer/shortest-paths.h: New file.
---
 gcc/analyzer/digraph.cc       | 189 ++++++++++++++++++++++++++++++++
 gcc/analyzer/digraph.h        | 248 ++++++++++++++++++++++++++++++++++++++++++
 gcc/analyzer/shortest-paths.h | 147 +++++++++++++++++++++++++
 3 files changed, 584 insertions(+)
 create mode 100644 gcc/analyzer/digraph.cc
 create mode 100644 gcc/analyzer/digraph.h
 create mode 100644 gcc/analyzer/shortest-paths.h

Patch
diff mbox series

diff --git a/gcc/analyzer/digraph.cc b/gcc/analyzer/digraph.cc
new file mode 100644
index 0000000..c1fa46e
--- /dev/null
+++ b/gcc/analyzer/digraph.cc
@@ -0,0 +1,189 @@ 
+/* Template classes for directed graphs.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "analyzer/graphviz.h"
+#include "analyzer/digraph.h"
+#include "analyzer/shortest-paths.h"
+#include "selftest.h"
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* A family of digraph classes for writing selftests.  */
+
+struct test_node;
+struct test_edge;
+struct test_graph;
+struct test_dump_args_t {};
+struct test_cluster;
+
+struct test_graph_traits
+{
+  typedef test_node node_t;
+  typedef test_edge edge_t;
+  typedef test_graph graph_t;
+  typedef test_dump_args_t dump_args_t;
+  typedef test_cluster cluster_t;
+};
+
+struct test_node : public dnode<test_graph_traits>
+{
+  test_node (const char *name, int index) : m_name (name), m_index (index) {}
+  void dump_dot (graphviz_out *, const dump_args_t &) const OVERRIDE
+  {
+  }
+
+  const char *m_name;
+  int m_index;
+};
+
+struct test_edge : public dedge<test_graph_traits>
+{
+  test_edge (node_t *src, node_t *dest)
+  : dedge (src, dest)
+  {}
+
+  void dump_dot (graphviz_out *gv, const dump_args_t &) const OVERRIDE
+  {
+    gv->println ("%s -> %s;", m_src->m_name, m_dest->m_name);
+  }
+};
+
+struct test_graph : public digraph<test_graph_traits>
+{
+  test_node *add_test_node (const char *name)
+  {
+    test_node *result = new test_node (name, m_nodes.length ());
+    add_node (result);
+    return result;
+  }
+
+  test_edge *add_test_edge (test_node *src, test_node *dst)
+  {
+    test_edge *result = new test_edge (src, dst);
+    add_edge (result);
+    return result;
+  }
+};
+
+struct test_cluster : public cluster<test_graph_traits>
+{
+};
+
+struct test_path
+{
+  auto_vec<const test_edge *> m_edges;
+};
+
+/* Smoketest of digraph dumping.  */
+
+static void
+test_dump_to_dot ()
+{
+  test_graph g;
+  test_node *a = g.add_test_node ("a");
+  test_node *b = g.add_test_node ("b");
+  g.add_test_edge (a, b);
+
+  pretty_printer pp;
+  pp.buffer->stream = NULL;
+  test_dump_args_t dump_args;
+  g.dump_dot_to_pp (&pp, NULL, dump_args);
+
+  ASSERT_STR_CONTAINS (pp_formatted_text (&pp),
+		       "a -> b;\n");
+}
+
+/* Test shortest paths from A in this digraph,
+   where edges run top-to-bottom if not otherwise labeled:
+
+      A
+     / \
+    B   C-->D
+    |   |
+    E   |
+     \ /
+      F.  */
+
+static void
+test_shortest_paths ()
+{
+  test_graph g;
+  test_node *a = g.add_test_node ("a");
+  test_node *b = g.add_test_node ("b");
+  test_node *c = g.add_test_node ("d");
+  test_node *d = g.add_test_node ("d");
+  test_node *e = g.add_test_node ("e");
+  test_node *f = g.add_test_node ("f");
+
+  test_edge *ab = g.add_test_edge (a, b);
+  test_edge *ac = g.add_test_edge (a, c);
+  test_edge *cd = g.add_test_edge (c, d);
+  test_edge *be = g.add_test_edge (b, e);
+  g.add_test_edge (e, f);
+  test_edge *cf = g.add_test_edge (c, f);
+
+  shortest_paths<test_graph_traits, test_path> sp (g, a);
+
+  test_path path_to_a = sp.get_shortest_path (a);
+  ASSERT_EQ (path_to_a.m_edges.length (), 0);
+
+  test_path path_to_b = sp.get_shortest_path (b);
+  ASSERT_EQ (path_to_b.m_edges.length (), 1);
+  ASSERT_EQ (path_to_b.m_edges[0], ab);
+
+  test_path path_to_c = sp.get_shortest_path (c);
+  ASSERT_EQ (path_to_c.m_edges.length (), 1);
+  ASSERT_EQ (path_to_c.m_edges[0], ac);
+
+  test_path path_to_d = sp.get_shortest_path (d);
+  ASSERT_EQ (path_to_d.m_edges.length (), 2);
+  ASSERT_EQ (path_to_d.m_edges[0], ac);
+  ASSERT_EQ (path_to_d.m_edges[1], cd);
+
+  test_path path_to_e = sp.get_shortest_path (e);
+  ASSERT_EQ (path_to_e.m_edges.length (), 2);
+  ASSERT_EQ (path_to_e.m_edges[0], ab);
+  ASSERT_EQ (path_to_e.m_edges[1], be);
+
+  test_path path_to_f = sp.get_shortest_path (f);
+  ASSERT_EQ (path_to_f.m_edges.length (), 2);
+  ASSERT_EQ (path_to_f.m_edges[0], ac);
+  ASSERT_EQ (path_to_f.m_edges[1], cf);
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+analyzer_digraph_cc_tests ()
+{
+  test_dump_to_dot ();
+  test_shortest_paths ();
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/analyzer/digraph.h b/gcc/analyzer/digraph.h
new file mode 100644
index 0000000..5c6a496
--- /dev/null
+++ b/gcc/analyzer/digraph.h
@@ -0,0 +1,248 @@ 
+/* Template classes for directed graphs.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ANALYZER_DIGRAPH_H
+#define GCC_ANALYZER_DIGRAPH_H
+
+#include "diagnostic.h"
+#include "tree-diagnostic.h" /* for default_tree_printer.  */
+#include "analyzer/graphviz.h"
+
+/* Templates for a family of classes: digraph, node, edge, and cluster.
+   This assumes a traits type with the following typedefs:
+   node_t: the node class
+   edge_t: the edge class
+   dump_args_t: additional args for dot-dumps
+   cluster_t: the cluster class (for use when generating .dot files).
+
+   Using a template allows for typesafe nodes and edges: a node's
+   predecessor and successor edges can be of a node-specific edge
+   subclass, without needing casting.  */
+
+/* Abstract base class for a node in a directed graph.  */
+
+template <typename GraphTraits>
+class dnode
+{
+ public:
+  typedef typename GraphTraits::edge_t edge_t;
+  typedef typename GraphTraits::dump_args_t dump_args_t;
+
+  virtual ~dnode () {}
+  virtual void dump_dot (graphviz_out *gv, const dump_args_t &args) const = 0;
+
+  auto_vec<edge_t *> m_preds;
+  auto_vec<edge_t *> m_succs;
+};
+
+/* Abstract base class for an edge in a directed graph.  */
+
+template <typename GraphTraits>
+class dedge
+{
+ public:
+  typedef typename GraphTraits::node_t node_t;
+  typedef typename GraphTraits::dump_args_t dump_args_t;
+
+  dedge (node_t *src, node_t *dest)
+  : m_src (src), m_dest (dest) {}
+
+  virtual ~dedge () {}
+
+  virtual void dump_dot (graphviz_out *gv, const dump_args_t &args) const = 0;
+
+  node_t *const m_src;
+  node_t *const m_dest;
+};
+
+/* Abstract base class for a directed graph.
+   This class maintains the vectors of nodes and edges,
+   and owns the nodes and edges.  */
+
+template <typename GraphTraits>
+class digraph
+{
+ public:
+  typedef typename GraphTraits::node_t node_t;
+  typedef typename GraphTraits::edge_t edge_t;
+  typedef typename GraphTraits::dump_args_t dump_args_t;
+  typedef typename GraphTraits::cluster_t cluster_t;
+
+  digraph () {}
+  virtual ~digraph () {}
+
+  void dump_dot_to_pp (pretty_printer *pp,
+		       cluster_t *root_cluster,
+		       const dump_args_t &args) const;
+  void dump_dot_to_file (FILE *fp,
+			 cluster_t *root_cluster,
+			 const dump_args_t &args) const;
+  void dump_dot (const char *path,
+		 cluster_t *root_cluster,
+		 const dump_args_t &args) const;
+
+  void add_node (node_t *node);
+  void add_edge (edge_t *edge);
+
+  auto_delete_vec<node_t> m_nodes;
+  auto_delete_vec<edge_t> m_edges;
+};
+
+/* Abstract base class for splitting dnodes into hierarchical clusters
+   in the generated .dot file.
+
+   See "Subgraphs and Clusters" within
+     https://www.graphviz.org/doc/info/lang.html
+   and e.g.
+     https://graphviz.gitlab.io/_pages/Gallery/directed/cluster.html
+
+   If a root_cluster is passed to dump_dot*, then all nodes will be
+   added to it at the start of dumping, via calls to add_node.
+
+   The root cluster can organize the nodes into a hierarchy of
+   child clusters.
+
+   After all nodes are added to the root cluster, dump_dot will then
+   be called on it (and not on the nodes themselves).  */
+
+template <typename GraphTraits>
+class cluster
+{
+ public:
+  typedef typename GraphTraits::node_t node_t;
+  typedef typename GraphTraits::dump_args_t dump_args_t;
+
+  virtual ~cluster () {}
+
+  virtual void add_node (node_t *node) = 0;
+
+  /* Recursively dump the cluster, all nodes, and child clusters.  */
+  virtual void dump_dot (graphviz_out *gv, const dump_args_t &) const = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+/* Write .dot information for this graph to PP, passing ARGS to the nodes
+   and edges.
+   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into clusters.  */
+
+template <typename GraphTraits>
+inline void
+digraph<GraphTraits>::dump_dot_to_pp (pretty_printer *pp,
+				      cluster_t *root_cluster,
+				      const dump_args_t &args) const
+{
+  graphviz_out gv (pp);
+
+  pp_string (pp, "digraph \"");
+  pp_string (pp, "base");
+  pp_string (pp, "\" {\n");
+
+  gv.indent ();
+
+  pp_string (pp, "overlap=false;\n");
+  pp_string (pp, "compound=true;\n");
+
+  /* If using clustering, emit all nodes via clusters.  */
+  if (root_cluster)
+    {
+      int i;
+      node_t *n;
+      FOR_EACH_VEC_ELT (m_nodes, i, n)
+	root_cluster->add_node (n);
+      root_cluster->dump_dot (&gv, args);
+    }
+  else
+    {
+      /* Otherwise, display all nodes at top level.  */
+      int i;
+      node_t *n;
+      FOR_EACH_VEC_ELT (m_nodes, i, n)
+	n->dump_dot (&gv, args);
+    }
+
+  /* Edges.  */
+  int i;
+  edge_t *e;
+  FOR_EACH_VEC_ELT (m_edges, i, e)
+    e->dump_dot (&gv, args);
+
+  /* Terminate "digraph" */
+  gv.outdent ();
+  pp_string (pp, "}");
+  pp_newline (pp);
+}
+
+/* Write .dot information for this graph to FP, passing ARGS to the nodes
+   and edges.
+   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into clusters.  */
+
+template <typename GraphTraits>
+inline void
+digraph<GraphTraits>::dump_dot_to_file (FILE *fp,
+					cluster_t *root_cluster,
+					const dump_args_t &args) const
+{
+  pretty_printer pp;
+  // TODO:
+  pp_format_decoder (&pp) = default_tree_printer;
+  pp.buffer->stream = fp;
+  dump_dot_to_pp (&pp, root_cluster, args);
+  pp_flush (&pp);
+}
+
+/* Write .dot information for this graph to a file at PATH, passing ARGS
+   to the nodes and edges.
+   If ROOT_CLUSTER is non-NULL, use it to organize the nodes into clusters.  */
+
+template <typename GraphTraits>
+inline void
+digraph<GraphTraits>::dump_dot (const char *path,
+				cluster_t *root_cluster,
+				const dump_args_t &args) const
+{
+  FILE *fp = fopen (path, "w");
+  dump_dot_to_file (fp, root_cluster, args);
+  fclose (fp);
+}
+
+/* Add NODE to this DIGRAPH, taking ownership.  */
+
+template <typename GraphTraits>
+inline void
+digraph<GraphTraits>::add_node (node_t *node)
+{
+  m_nodes.safe_push (node);
+}
+
+/* Add EDGE to this digraph, and to the preds/succs of its endpoints.
+   Take ownership of EDGE.  */
+
+template <typename GraphTraits>
+inline void
+digraph<GraphTraits>::add_edge (edge_t *edge)
+{
+  m_edges.safe_push (edge);
+  edge->m_dest->m_preds.safe_push (edge);
+  edge->m_src->m_succs.safe_push (edge);
+
+}
+
+#endif /* GCC_ANALYZER_DIGRAPH_H */
diff --git a/gcc/analyzer/shortest-paths.h b/gcc/analyzer/shortest-paths.h
new file mode 100644
index 0000000..4738f18
--- /dev/null
+++ b/gcc/analyzer/shortest-paths.h
@@ -0,0 +1,147 @@ 
+/* Template class for Dijkstra's algorithm on directed graphs.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ANALYZER_SHORTEST_PATHS_H
+#define GCC_ANALYZER_SHORTEST_PATHS_H
+
+#include "timevar.h"
+
+/* A record of the shortest path to each node in an graph
+   from the origin node.
+   The constructor runs Dijkstra's algorithm, and the results are
+   stored in this class.  */
+
+template <typename GraphTraits, typename Path_t>
+class shortest_paths
+{
+public:
+  typedef typename GraphTraits::graph_t graph_t;
+  typedef typename GraphTraits::node_t node_t;
+  typedef typename GraphTraits::edge_t edge_t;
+  typedef Path_t path_t;
+
+  shortest_paths (const graph_t &graph, const node_t *origin);
+
+  path_t get_shortest_path (const node_t *to) const;
+
+private:
+  const graph_t &m_graph;
+
+  /* For each node (by index), the minimal distance to that node from the
+     origin.  */
+  auto_vec<int> m_dist;
+
+  /* For each exploded_node (by index), the previous edge in the shortest
+     path from the origin.  */
+  auto_vec<const edge_t *> m_prev;
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+/* shortest_paths's constructor.
+
+   Use Dijkstra's algorithm relative to ORIGIN to populate m_dist and
+   m_prev with enough information to be able to generate Path_t instances
+   to give the shortest path to any node in GRAPH from ORIGIN.  */
+
+template <typename GraphTraits, typename Path_t>
+inline
+shortest_paths<GraphTraits, Path_t>::shortest_paths (const graph_t &graph,
+						     const node_t *origin)
+: m_graph (graph),
+  m_dist (graph.m_nodes.length ()),
+  m_prev (graph.m_nodes.length ())
+{
+  auto_client_timevar tv ("shortest_paths");
+
+  auto_vec<int> queue (graph.m_nodes.length ());
+
+  for (unsigned i = 0; i < graph.m_nodes.length (); i++)
+    {
+      m_dist.quick_push (INT_MAX);
+      m_prev.quick_push (NULL);
+      queue.quick_push (i);
+    }
+  m_dist[origin->m_index] = 0;
+
+  while (queue.length () > 0)
+    {
+      /* Get minimal distance in queue.
+	 FIXME: this is O(N^2); replace with a priority queue.  */
+      int idx_with_min_dist = -1;
+      int idx_in_queue_with_min_dist = -1;
+      int min_dist = INT_MAX;
+      for (unsigned i = 0; i < queue.length (); i++)
+	{
+	  int idx = queue[i];
+	  if (m_dist[queue[i]] < min_dist)
+	    {
+	      min_dist = m_dist[idx];
+	      idx_with_min_dist = idx;
+	      idx_in_queue_with_min_dist = i;
+	    }
+	}
+      gcc_assert (idx_with_min_dist != -1);
+      gcc_assert (idx_in_queue_with_min_dist != -1);
+
+      // FIXME: this is confusing: there are two indices here
+
+      queue.unordered_remove (idx_in_queue_with_min_dist);
+
+      node_t *n
+	= static_cast <node_t *> (m_graph.m_nodes[idx_with_min_dist]);
+
+      int i;
+      edge_t *succ;
+      FOR_EACH_VEC_ELT (n->m_succs, i, succ)
+	{
+	  // TODO: only for dest still in queue
+	  node_t *dest = succ->m_dest;
+	  int alt = m_dist[n->m_index] + 1;
+	  if (alt < m_dist[dest->m_index])
+	    {
+	      m_dist[dest->m_index] = alt;
+	      m_prev[dest->m_index] = succ;
+	    }
+	}
+   }
+}
+
+/* Generate an Path_t instance giving the shortest path to the node
+   TO from the origin node.  */
+
+template <typename GraphTraits, typename Path_t>
+inline Path_t
+shortest_paths<GraphTraits, Path_t>::get_shortest_path (const node_t *to) const
+{
+  Path_t result;
+
+  while (m_prev[to->m_index])
+    {
+      result.m_edges.safe_push (m_prev[to->m_index]);
+      to = m_prev[to->m_index]->m_src;
+    }
+
+  result.m_edges.reverse ();
+
+  return result;
+}
+
+#endif /* GCC_ANALYZER_SHORTEST_PATHS_H */