[34/45] analyzer: new file: sm-taint.cc
diff mbox series

Message ID 20191213181134.1830-35-dmalcolm@redhat.com
State New
Headers show
Series
  • v4 of analyzer patch kit
Related show

Commit Message

David Malcolm Dec. 13, 2019, 6:11 p.m. UTC
Changed in v4:
- Remove include of gcc-plugin.h, reworking includes accordingly.
- Wrap everything in #if ENABLE_ANALYZER
- Remove /// comment lines
- Rework on_leak vfunc:
    https://gcc.gnu.org/ml/gcc-patches/2019-11/msg02028.html
- Rework for changes to is_named_call_p, resolving function pointers:
   https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00178.html

This patch adds a state machine checker for tracking "taint",
where data potentially under an attacker's control is used for
things like array indices without sanitization (CWE-129).

This checker isn't ready for production, and is presented as a
proof-of-concept of the sm-based approach.

gcc/ChangeLog:
	* analyzer/sm-taint.cc: New file.
---
 gcc/analyzer/sm-taint.cc | 325 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 gcc/analyzer/sm-taint.cc

Patch
diff mbox series

diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc
new file mode 100644
index 000000000000..8b20f78ffe60
--- /dev/null
+++ b/gcc/analyzer/sm-taint.cc
@@ -0,0 +1,325 @@ 
+/* An experimental state machine, for tracking "taint": unsanitized uses
+   of data potentially under an attacker's control.
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "options.h"
+#include "diagnostic-path.h"
+#include "diagnostic-metadata.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/pending-diagnostic.h"
+#include "analyzer/sm.h"
+
+#if ENABLE_ANALYZER
+
+namespace {
+
+/* An experimental state machine, for tracking "taint": unsanitized uses
+   of data potentially under an attacker's control.  */
+
+class taint_state_machine : public state_machine
+{
+public:
+  taint_state_machine (logger *logger);
+
+  bool inherited_state_p () const FINAL OVERRIDE { return true; }
+
+  bool on_stmt (sm_context *sm_ctxt,
+		const supernode *node,
+		const gimple *stmt) const FINAL OVERRIDE;
+
+  void on_condition (sm_context *sm_ctxt,
+		     const supernode *node,
+		     const gimple *stmt,
+		     tree lhs,
+		     enum tree_code op,
+		     tree rhs) const FINAL OVERRIDE;
+
+  bool can_purge_p (state_t s) const FINAL OVERRIDE;
+
+  /* Start state.  */
+  state_t m_start;
+
+  /* State for a "tainted" value: unsanitized data potentially under an
+     attacker's control.  */
+  state_t m_tainted;
+
+  /* State for a "tainted" value that has a lower bound.  */
+  state_t m_has_lb;
+
+  /* State for a "tainted" value that has an upper bound.  */
+  state_t m_has_ub;
+
+  /* Stop state, for a value we don't want to track any more.  */
+  state_t m_stop;
+};
+
+enum bounds
+{
+  BOUNDS_NONE,
+  BOUNDS_UPPER,
+  BOUNDS_LOWER
+};
+
+class tainted_array_index
+  : public pending_diagnostic_subclass<tainted_array_index>
+{
+public:
+  tainted_array_index (const taint_state_machine &sm, tree arg,
+		       enum bounds has_bounds)
+  : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
+
+  const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
+
+  bool operator== (const tainted_array_index &other) const
+  {
+    return m_arg == other.m_arg;
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    diagnostic_metadata m;
+    m.add_cwe (129);
+    switch (m_has_bounds)
+      {
+      default:
+	gcc_unreachable ();
+      case BOUNDS_NONE:
+	return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
+			   "use of tainted value %qE in array lookup"
+			   " without bounds checking",
+			   m_arg);
+	break;
+      case BOUNDS_UPPER:
+	return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
+			   "use of tainted value %qE in array lookup"
+			   " without lower-bounds checking",
+			   m_arg);
+	break;
+      case BOUNDS_LOWER:
+	return warning_at (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
+			   "use of tainted value %qE in array lookup"
+			   " without upper-bounds checking",
+			   m_arg);
+	break;
+      }
+  }
+
+  label_text describe_state_change (const evdesc::state_change &change)
+    FINAL OVERRIDE
+  {
+    if (change.m_new_state == m_sm.m_tainted)
+      {
+	if (change.m_origin)
+	  return change.formatted_print ("%qE has an unchecked value here"
+					 " (from %qE)",
+					 change.m_expr, change.m_origin);
+	else
+	  return change.formatted_print ("%qE gets an unchecked value here",
+					 change.m_expr);
+      }
+    else if (change.m_new_state == m_sm.m_has_lb)
+      return change.formatted_print ("%qE has its lower bound checked here",
+				     change.m_expr);
+    else if (change.m_new_state == m_sm.m_has_ub)
+      return change.formatted_print ("%qE has its upper bound checked here",
+				     change.m_expr);
+    return label_text ();
+  }
+
+  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+  {
+    switch (m_has_bounds)
+      {
+      default:
+	gcc_unreachable ();
+      case BOUNDS_NONE:
+	return ev.formatted_print ("use of tainted value %qE in array lookup"
+				   " without bounds checking",
+				   m_arg);
+      case BOUNDS_UPPER:
+	return ev.formatted_print ("use of tainted value %qE in array lookup"
+				   " without lower-bounds checking",
+				   m_arg);
+      case BOUNDS_LOWER:
+	return ev.formatted_print ("use of tainted value %qE in array lookup"
+				   " without upper-bounds checking",
+				   m_arg);
+      }
+  }
+
+private:
+  const taint_state_machine &m_sm;
+  tree m_arg;
+  enum bounds m_has_bounds;
+};
+
+/* taint_state_machine's ctor.  */
+
+taint_state_machine::taint_state_machine (logger *logger)
+: state_machine ("taint", logger)
+{
+  m_start = add_state ("start");
+  m_tainted = add_state ("tainted");
+  m_has_lb = add_state ("has_lb");
+  m_has_ub = add_state ("has_ub");
+  m_stop = add_state ("stop");
+}
+
+/* Implementation of state_machine::on_stmt vfunc for taint_state_machine.  */
+
+bool
+taint_state_machine::on_stmt (sm_context *sm_ctxt,
+			       const supernode *node,
+			       const gimple *stmt) const
+{
+  if (const gcall *call = dyn_cast <const gcall *> (stmt))
+    if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
+      {
+	if (is_named_call_p (callee_fndecl, "fread", call, 4))
+	  {
+	    tree arg = gimple_call_arg (call, 0);
+	    arg = sm_ctxt->get_readable_tree (arg);
+
+	    sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
+
+	    /* Dereference an ADDR_EXPR.  */
+	    // TODO: should the engine do this?
+	    if (TREE_CODE (arg) == ADDR_EXPR)
+	      sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
+				      m_start, m_tainted);
+	    return true;
+	  }
+      }
+  // TODO: ...etc; many other sources of untrusted data
+
+  if (const gassign *assign = dyn_cast <const gassign *> (stmt))
+    {
+      tree rhs1 = gimple_assign_rhs1 (assign);
+      enum tree_code op = gimple_assign_rhs_code (assign);
+
+      /* Check array accesses.  */
+      if (op == ARRAY_REF)
+	{
+	  tree arg = TREE_OPERAND (rhs1, 1);
+	  arg = sm_ctxt->get_readable_tree (arg);
+
+	  /* Unsigned types have an implicit lower bound.  */
+	  bool is_unsigned = false;
+	  if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+	    is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
+
+	  /* Complain about missing bounds.  */
+	  sm_ctxt->warn_for_state
+	    (node, stmt, arg, m_tainted,
+	     new tainted_array_index (*this, arg,
+				      is_unsigned
+				      ? BOUNDS_LOWER : BOUNDS_NONE));
+	  sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop);
+
+	  /* Complain about missing upper bound.  */
+	  sm_ctxt->warn_for_state  (node, stmt, arg, m_has_lb,
+				    new tainted_array_index (*this, arg,
+							     BOUNDS_LOWER));
+	  sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop);
+
+	  /* Complain about missing lower bound.  */
+	  if (!is_unsigned)
+	    {
+	      sm_ctxt->warn_for_state  (node, stmt, arg, m_has_ub,
+					new tainted_array_index (*this, arg,
+								 BOUNDS_UPPER));
+	      sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop);
+	    }
+	}
+    }
+
+  return false;
+}
+
+/* Implementation of state_machine::on_condition vfunc for taint_state_machine.
+   Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
+   and states 'has_ub' and 'has_lb' to 'stop'.  */
+
+void
+taint_state_machine::on_condition (sm_context *sm_ctxt,
+				   const supernode *node,
+				   const gimple *stmt,
+				   tree lhs,
+				   enum tree_code op,
+				   tree rhs ATTRIBUTE_UNUSED) const
+{
+  if (stmt == NULL)
+    return;
+
+  // TODO: this doesn't use the RHS; should we make it symmetric?
+
+  // TODO
+  switch (op)
+    {
+      //case NE_EXPR:
+      //case EQ_EXPR:
+    case GE_EXPR:
+    case GT_EXPR:
+      {
+	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
+				m_has_lb);
+	sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
+				m_stop);
+      }
+      break;
+    case LE_EXPR:
+    case LT_EXPR:
+      {
+	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
+				m_has_ub);
+	sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
+				m_stop);
+      }
+      break;
+    default:
+      break;
+    }
+}
+
+bool
+taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
+{
+  return true;
+}
+
+} // anonymous namespace
+
+/* Internal interface to this file. */
+
+state_machine *
+make_taint_state_machine (logger *logger)
+{
+  return new taint_state_machine (logger);
+}
+
+#endif /* #if ENABLE_ANALYZER */