[33/49] analyzer: new files: sm.{cc|h}
diff mbox series

Message ID 1573867416-55618-34-git-send-email-dmalcolm@redhat.com
State New
Headers show
Series
  • RFC: Add a static analysis framework to GCC
Related show

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m. UTC
This patch adds a "state_machine" base class for describing
API checkers in terms of state machine transitions.  Followup
patches use this to add specific API checkers.

gcc/ChangeLog:
	* analyzer/sm.cc: New file.
	* analyzer/sm.h: New file.
---
 gcc/analyzer/sm.cc | 135 ++++++++++++++++++++++++++++++++++++++++++++
 gcc/analyzer/sm.h  | 160 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 295 insertions(+)
 create mode 100644 gcc/analyzer/sm.cc
 create mode 100644 gcc/analyzer/sm.h

Comments

Jeff Law Dec. 11, 2019, 7:23 p.m. UTC | #1
On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote:
> This patch adds a "state_machine" base class for describing
> API checkers in terms of state machine transitions.  Followup
> patches use this to add specific API checkers.
> 
> gcc/ChangeLog:
> 	* analyzer/sm.cc: New file.
> 	* analyzer/sm.h: New file.
> ---
>  gcc/analyzer/sm.cc | 135
> ++++++++++++++++++++++++++++++++++++++++++++
>  gcc/analyzer/sm.h  | 160
> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 295 insertions(+)
>  create mode 100644 gcc/analyzer/sm.cc
>  create mode 100644 gcc/analyzer/sm.h
> 
> diff --git a/gcc/analyzer/sm.cc b/gcc/analyzer/sm.cc
> new file mode 100644
> index 0000000..eda9350
> --- /dev/null
> +++ b/gcc/analyzer/sm.cc
> @@ -0,0 +1,135 @@
> +/* Modeling API uses and misuses via state machines.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   Contributed by David Malcolm <dmalcolm@redhat.com>.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it
> +under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful, but
> +WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>;.  */
> +
> +#include "config.h"
> +#include "gcc-plugin.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tree.h"
> +#include "gimple.h"
> +#include "analyzer/analyzer.h"
> +#include "analyzer/sm.h"
> +
> +////////////////////////////////////////////////////////////////////
> ////////
> +
> +/* If STMT is an assignment to zero, return the LHS.  */
> +
> +tree
> +is_zero_assignment (const gimple *stmt)
> +{
> +  const gassign *assign_stmt = dyn_cast <const gassign *> (stmt);
> +  if (!assign_stmt)
> +    return NULL_TREE;
> +
> +  enum tree_code op = gimple_assign_rhs_code (assign_stmt);
> +  if (op != INTEGER_CST)
> +    return NULL_TREE;
> +
> +  if (!zerop (gimple_assign_rhs1 (assign_stmt)))
> +    return NULL_TREE;
> +
> +  return gimple_assign_lhs (assign_stmt);
> +}
"assignment from zero" rather than "assignment to zero" I think.

I think you'd want to use "integer_zerop" rather than an open-coded
check.  That'll also allow you to pick up other forms such as
COMPLEX_CST and VECTOR_CST.




> +
> +/* If COND_STMT is a comparison against zero of the form (LHS OP 0),
> +   return true and write what's being compared to *OUT_LHS and the
> kind of
> +   the comparison to *OUT_OP.  */
> +
> +bool
> +is_comparison_against_zero (const gcond *cond_stmt,
> +			    tree *out_lhs, enum tree_code *out_op)
> +{
> +  enum tree_code op = gimple_cond_code (cond_stmt);
> +  tree lhs = gimple_cond_lhs (cond_stmt);
> +  tree rhs = gimple_cond_rhs (cond_stmt);
> +  if (!zerop (rhs))
> +    return false;
> +  // TODO: make it symmetric?
> +
> +  switch (op)
> +    {
> +    case NE_EXPR:
> +    case EQ_EXPR:
> +      *out_lhs = lhs;
> +      *out_op = op;
> +      return true;
> +
> +    default:
> +      return false;
> +    }
> +}
Seems like this might be useful to make generically available.




> +
> +bool
> +any_pointer_p (tree var)
> +{
> +  if (TREE_CODE (TREE_TYPE (var)) != POINTER_TYPE)
> +    return false;
> +
> +  return true;
> +}
I believe what you really want is POINTER_TYPE_P which will also happen
to help C++ since it'll include things that are REFERENCE_TYPEs.


It looks like some of the methods don't have comments on their
definitions.  Please check those and add them as necessary.

Otherwise it looks reasonable.

jeff

Patch
diff mbox series

diff --git a/gcc/analyzer/sm.cc b/gcc/analyzer/sm.cc
new file mode 100644
index 0000000..eda9350
--- /dev/null
+++ b/gcc/analyzer/sm.cc
@@ -0,0 +1,135 @@ 
+/* Modeling API uses and misuses via state machines.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "gimple.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/sm.h"
+
+////////////////////////////////////////////////////////////////////////////
+
+/* If STMT is an assignment to zero, return the LHS.  */
+
+tree
+is_zero_assignment (const gimple *stmt)
+{
+  const gassign *assign_stmt = dyn_cast <const gassign *> (stmt);
+  if (!assign_stmt)
+    return NULL_TREE;
+
+  enum tree_code op = gimple_assign_rhs_code (assign_stmt);
+  if (op != INTEGER_CST)
+    return NULL_TREE;
+
+  if (!zerop (gimple_assign_rhs1 (assign_stmt)))
+    return NULL_TREE;
+
+  return gimple_assign_lhs (assign_stmt);
+}
+
+/* If COND_STMT is a comparison against zero of the form (LHS OP 0),
+   return true and write what's being compared to *OUT_LHS and the kind of
+   the comparison to *OUT_OP.  */
+
+bool
+is_comparison_against_zero (const gcond *cond_stmt,
+			    tree *out_lhs, enum tree_code *out_op)
+{
+  enum tree_code op = gimple_cond_code (cond_stmt);
+  tree lhs = gimple_cond_lhs (cond_stmt);
+  tree rhs = gimple_cond_rhs (cond_stmt);
+  if (!zerop (rhs))
+    return false;
+  // TODO: make it symmetric?
+
+  switch (op)
+    {
+    case NE_EXPR:
+    case EQ_EXPR:
+      *out_lhs = lhs;
+      *out_op = op;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+bool
+any_pointer_p (tree var)
+{
+  if (TREE_CODE (TREE_TYPE (var)) != POINTER_TYPE)
+    return false;
+
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+state_machine::state_t
+state_machine::add_state (const char *name)
+{
+  m_state_names.safe_push (name);
+  return m_state_names.length () - 1;
+}
+
+const char *
+state_machine::get_state_name (state_t s) const
+{
+  return m_state_names[s];
+}
+
+void
+state_machine::validate (state_t s) const
+{
+  gcc_assert (s < m_state_names.length ());
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+void
+make_checkers (auto_delete_vec <state_machine> &out, logger *logger)
+{
+  out.safe_push (make_malloc_state_machine (logger));
+  out.safe_push (make_fileptr_state_machine (logger));
+  out.safe_push (make_taint_state_machine (logger));
+  out.safe_push (make_sensitive_state_machine (logger));
+
+  /* We only attempt to run the pattern tests if it might have been manually
+     enabled (for DejaGnu purposes).  */
+  if (flag_analyzer_checker)
+    out.safe_push (make_pattern_test_state_machine (logger));
+
+  if (flag_analyzer_checker)
+    {
+      unsigned read_index, write_index;
+      state_machine **sm;
+
+      /* TODO: this leaks the machines
+	 Would be nice to log the things that were removed.  */
+      VEC_ORDERED_REMOVE_IF (out, read_index, write_index, sm,
+			     0 != strcmp (flag_analyzer_checker,
+					  (*sm)->get_name ()));
+    }
+}
diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h
new file mode 100644
index 0000000..0a89f8a
--- /dev/null
+++ b/gcc/analyzer/sm.h
@@ -0,0 +1,160 @@ 
+/* Modeling API uses and misuses via state machines.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ANALYZER_SM_H
+#define GCC_ANALYZER_SM_H
+
+#include "analyzer/analyzer-logging.h"
+
+////////////////////////////////////////////////////////////////////////////
+
+/* Utility functions for use by state machines.  */
+
+extern tree is_zero_assignment (const gimple *stmt);
+extern bool is_comparison_against_zero (const gcond *cond_stmt,
+					tree *out_lhs, enum tree_code *out_op);
+extern bool any_pointer_p (tree var);
+
+////////////////////////////////////////////////////////////////////////////
+
+class state_machine;
+class sm_context;
+class pending_diagnostic;
+
+/* An abstract base class for a state machine describing an API.
+   A mapping from state IDs to names, and various virtual functions
+   for pattern-matching on statements.  */
+
+class state_machine : public log_user
+{
+public:
+  typedef unsigned state_t;
+
+  state_machine (const char *name, logger *logger)
+  : log_user (logger), m_name (name) {}
+
+  virtual ~state_machine () {}
+
+  /* Should states be inherited from a parent region to a child region,
+     when first accessing a child region?
+     For example we should inherit the taintedness of a subregion,
+     but we should not inherit the "malloc:non-null" state of a field
+     within a heap-allocated struct.  */
+  virtual bool inherited_state_p () const = 0;
+
+  const char *get_name () const { return m_name; }
+
+  const char *get_state_name (state_t s) const;
+
+  /* Return true if STMT is a function call recognized by this sm.  */
+  virtual bool on_stmt (sm_context *sm_ctxt,
+			const supernode *node,
+			const gimple *stmt) const = 0;
+
+  virtual void on_condition (sm_context *sm_ctxt,
+			     const supernode *node,
+			     const gimple *stmt,
+			     tree lhs, enum tree_code op, tree rhs) const = 0;
+
+  virtual void on_leak (sm_context *sm_ctxt,
+			const supernode *node,
+			const gimple *stmt,
+			tree var,
+			state_machine::state_t state) const = 0;
+
+  /* Return true if it safe to discard the given state (to help
+     when simplifying state objects).
+     States that need leak detection should return false.  */
+  virtual bool can_purge_p (state_t s) const = 0;
+
+  void validate (state_t s) const;
+
+protected:
+  state_t add_state (const char *name);
+
+private:
+  const char *m_name;
+  auto_vec<const char *> m_state_names;
+};
+
+/* Is STATE the start state?  (zero is hardcoded as the start state).  */
+
+static inline bool
+start_start_p (state_machine::state_t state)
+{
+  return state == 0;
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+/* Abstract base class giving an interface for the state machine to call
+   the checker engine, at a particular stmt.  */
+
+class sm_context
+{
+public:
+  virtual ~sm_context () {}
+
+  /* Called by state_machine in response to pattern matches:
+     if VAR is in state FROM, transition it to state TO, potentially
+     recording the "origin" of the state as ORIGIN.
+     Use NODE and STMT for location information.  */
+   virtual void on_transition (const supernode *node, const gimple *stmt,
+			      tree var,
+			      state_machine::state_t from,
+			      state_machine::state_t to,
+			      tree origin = NULL_TREE) = 0;
+
+  /* Called by state_machine in response to pattern matches:
+     issue a diagnostic D if VAR is in state STATE, using NODE and STMT
+     for location information.  */
+  virtual void warn_for_state (const supernode *node, const gimple *stmt,
+			       tree var, state_machine::state_t state,
+			       pending_diagnostic *d) = 0;
+
+  virtual tree get_readable_tree (tree expr)
+  {
+    return expr;
+  }
+
+protected:
+  sm_context (int sm_idx, const state_machine &sm)
+  : m_sm_idx (sm_idx), m_sm (sm) {}
+
+  int m_sm_idx;
+  const state_machine &m_sm;
+};
+
+
+////////////////////////////////////////////////////////////////////////////
+
+/* The various state_machine subclasses are hidden in their respective
+   implementation files.  */
+
+extern void make_checkers (auto_delete_vec <state_machine> &out,
+			   logger *logger);
+
+extern state_machine *make_malloc_state_machine (logger *logger);
+extern state_machine *make_fileptr_state_machine (logger *logger);
+extern state_machine *make_taint_state_machine (logger *logger);
+extern state_machine *make_sensitive_state_machine (logger *logger);
+extern state_machine *make_pattern_test_state_machine (logger *logger);
+
+#endif /* GCC_ANALYZER_SM_H */