diff mbox series

[37/49] analyzer: new file: sm-sensitive.cc

Message ID 1573867416-55618-38-git-send-email-dmalcolm@redhat.com
State New
Headers show
Series RFC: Add a static analysis framework to GCC | expand

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m. UTC
This patch adds a state machine checker for tracking exposure of
sensitive data (e.g. writing passwords to log files).

This checker isn't ready for production, and is presented as a
proof-of-concept of the sm-based approach.

gcc/ChangeLog:
	* analyzer/sm-sensitive.cc: New file.
---
 gcc/analyzer/sm-sensitive.cc | 209 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 gcc/analyzer/sm-sensitive.cc

Comments

Jeff Law Dec. 7, 2019, 3:18 p.m. UTC | #1
On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote:
> This patch adds a state machine checker for tracking exposure of
> sensitive data (e.g. writing passwords to log files).
> 
> This checker isn't ready for production, and is presented as a
> proof-of-concept of the sm-based approach.
> 
> gcc/ChangeLog:
> 	* analyzer/sm-sensitive.cc: New file.
Given it's not ready for production, fine.  Presumably one of the areas
for improvement is a better answer to the "what constitutes exposure"
question ;-)


jeff
>
diff mbox series

Patch

diff --git a/gcc/analyzer/sm-sensitive.cc b/gcc/analyzer/sm-sensitive.cc
new file mode 100644
index 0000000..f634b8f
--- /dev/null
+++ b/gcc/analyzer/sm-sensitive.cc
@@ -0,0 +1,209 @@ 
+/* An experimental state machine, for tracking exposure of sensitive
+   data (e.g. through logging).
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalcolm@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "gimple.h"
+#include "diagnostic-path.h"
+#include "diagnostic-metadata.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/pending-diagnostic.h"
+#include "analyzer/sm.h"
+
+namespace {
+
+/* An experimental state machine, for tracking exposure of sensitive
+   data (e.g. through logging).  */
+
+class sensitive_state_machine : public state_machine
+{
+public:
+  sensitive_state_machine (logger *logger);
+
+  bool inherited_state_p () const FINAL OVERRIDE { return true; }
+
+  bool on_stmt (sm_context *sm_ctxt,
+		const supernode *node,
+		const gimple *stmt) const FINAL OVERRIDE;
+
+  void on_condition (sm_context *sm_ctxt,
+		     const supernode *node,
+		     const gimple *stmt,
+		     tree lhs,
+		     enum tree_code op,
+		     tree rhs) const FINAL OVERRIDE;
+
+  void on_leak (sm_context *sm_ctxt,
+		const supernode *node,
+		const gimple *stmt,
+		tree var,
+		state_machine::state_t state) const FINAL OVERRIDE;
+  bool can_purge_p (state_t s) const FINAL OVERRIDE;
+
+private:
+  void warn_for_any_exposure (sm_context *sm_ctxt,
+			      const supernode *node,
+			      const gimple *stmt,
+			      tree arg) const;
+
+  /* Start state.  */
+  state_t m_start;
+
+  /* State for "sensitive" data, such as a password.  */
+  state_t m_sensitive;
+
+  /* Stop state, for a value we don't want to track any more.  */
+  state_t m_stop;
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+class exposure_through_output_file
+  : public pending_diagnostic_subclass<exposure_through_output_file>
+{
+public:
+  exposure_through_output_file (tree arg) : m_arg (arg) {}
+
+  const char *get_kind () const FINAL OVERRIDE { return "exposure_through_output_file"; }
+
+  bool operator== (const exposure_through_output_file &other) const
+  {
+    return m_arg == other.m_arg;
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    diagnostic_metadata m;
+    /* CWE-532: Information Exposure Through Log Files */
+    m.add_cwe (532);
+    return warning_at (rich_loc, m, OPT_Wanalyzer_exposure_through_output_file,
+		       "sensitive value %qE written to output file",
+		       m_arg);
+  }
+
+private:
+  tree m_arg;
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+/* sensitive_state_machine's ctor.  */
+
+sensitive_state_machine::sensitive_state_machine (logger *logger)
+: state_machine ("sensitive", logger)
+{
+  m_start = add_state ("start");
+  m_sensitive = add_state ("sensitive");
+  m_stop = add_state ("stop");
+}
+
+/* Warn about an exposure at NODE and STMT if ARG is in the "sensitive"
+   state.  */
+
+void
+sensitive_state_machine::warn_for_any_exposure (sm_context *sm_ctxt,
+						const supernode *node,
+						const gimple *stmt,
+						tree arg) const
+{
+  sm_ctxt->warn_for_state (node, stmt, arg, m_sensitive,
+			   new exposure_through_output_file (arg));
+}
+
+/* Implementation of state_machine::on_stmt vfunc for
+   sensitive_state_machine.  */
+
+bool
+sensitive_state_machine::on_stmt (sm_context *sm_ctxt,
+				  const supernode *node,
+				  const gimple *stmt) const
+{
+  if (const gcall *call = dyn_cast <const gcall *> (stmt))
+    {
+      if (is_named_call_p (call, "getpass", 1))
+	{
+	  tree lhs = gimple_call_lhs (call);
+	  if (lhs)
+	    sm_ctxt->on_transition (node, stmt, lhs, m_start, m_sensitive);
+	  return true;
+	}
+      else if (is_named_call_p (call, "fprintf")
+	       || is_named_call_p (call, "printf"))
+	{
+	  /* Handle a match at any position in varargs.  */
+	  for (unsigned idx = 1; idx < gimple_call_num_args (call); idx++)
+	    {
+	      tree arg = gimple_call_arg (call, idx);
+	      warn_for_any_exposure (sm_ctxt, node, stmt, arg);
+	    }
+	  return true;
+	}
+      else if (is_named_call_p (call, "fwrite", 4))
+	{
+	  tree arg = gimple_call_arg (call, 0);
+	  warn_for_any_exposure (sm_ctxt, node, stmt, arg);
+	  return true;
+	}
+      // TODO: ...etc.  This is just a proof-of-concept at this point.
+    }
+  return false;
+}
+
+void
+sensitive_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
+				       const supernode *node ATTRIBUTE_UNUSED,
+				       const gimple *stmt ATTRIBUTE_UNUSED,
+				       tree lhs ATTRIBUTE_UNUSED,
+				       enum tree_code op ATTRIBUTE_UNUSED,
+				       tree rhs ATTRIBUTE_UNUSED) const
+{
+  /* Empty.  */
+}
+
+void
+sensitive_state_machine::on_leak (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
+				  const supernode *node ATTRIBUTE_UNUSED,
+				  const gimple *stmt ATTRIBUTE_UNUSED,
+				  tree var ATTRIBUTE_UNUSED,
+				  state_machine::state_t state ATTRIBUTE_UNUSED)
+  const
+{
+  /* Empty.  */
+}
+
+bool
+sensitive_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
+{
+  return true;
+}
+
+} // anonymous namespace
+
+/* Internal interface to this file. */
+
+state_machine *
+make_sensitive_state_machine (logger *logger)
+{
+  return new sensitive_state_machine (logger);
+}