diff mbox series

[pushed] analyzer: reimplement kf_strlen [PR105899]

Message ID 20230822223958.2936206-1-dmalcolm@redhat.com
State New
Headers show
Series [pushed] analyzer: reimplement kf_strlen [PR105899] | expand

Commit Message

David Malcolm Aug. 22, 2023, 10:39 p.m. UTC
Reimplement kf_strlen in terms of the new string scanning
implementation, sharing strlen's implementation with
__analyzer_get_strlen.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3391-g3242fb533d48ab.

gcc/analyzer/ChangeLog:
	PR analyzer/105899
	* kf-analyzer.cc (class kf_analyzer_get_strlen): Move to kf.cc.
	(register_known_analyzer_functions): Use make_kf_strlen.
	* kf.cc (class kf_strlen::impl_call_pre): Replace with
	implementation of kf_analyzer_get_strlen from kf-analyzer.cc.
	Handle "UNKNOWN" return from check_for_null_terminated_string_arg
	by falling back to a conjured svalue.
	(make_kf_strlen): New.
	(register_known_functions): Use make_kf_strlen.
	* known-function-manager.h (make_kf_strlen): New decl.

gcc/testsuite/ChangeLog:
	PR analyzer/105899
	* gcc.dg/analyzer/null-terminated-strings-1.c: Update expected
	results on symbolic values.
	* gcc.dg/analyzer/strlen-1.c: New test.
---
 gcc/analyzer/kf-analyzer.cc                   | 30 +---------
 gcc/analyzer/kf.cc                            | 56 +++++++++----------
 gcc/analyzer/known-function-manager.h         |  2 +
 .../analyzer/null-terminated-strings-1.c      |  4 +-
 gcc/testsuite/gcc.dg/analyzer/strlen-1.c      | 54 ++++++++++++++++++
 5 files changed, 85 insertions(+), 61 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/strlen-1.c
diff mbox series

Patch

diff --git a/gcc/analyzer/kf-analyzer.cc b/gcc/analyzer/kf-analyzer.cc
index c767ebcb6615..7ae598a89123 100644
--- a/gcc/analyzer/kf-analyzer.cc
+++ b/gcc/analyzer/kf-analyzer.cc
@@ -358,33 +358,6 @@  public:
   }
 };
 
-/* Handler for "__analyzer_get_strlen".  */
-
-class kf_analyzer_get_strlen : public known_function
-{
-public:
-  bool matches_call_types_p (const call_details &cd) const final override
-  {
-    return cd.num_args () == 1 && cd.arg_is_pointer_p (0);
-  }
-  void impl_call_pre (const call_details &cd) const final override
-  {
-    if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
-      {
-	region_model_manager *mgr = cd.get_manager ();
-	/* strlen is (bytes_read - 1).  */
-	const svalue *strlen_sval
-	  = mgr->get_or_create_binop (size_type_node,
-				      MINUS_EXPR,
-				      bytes_read,
-				      mgr->get_or_create_int_cst (size_type_node, 1));
-	cd.maybe_set_lhs (strlen_sval);
-      }
-    else
-      cd.set_any_lhs_with_defaults ();
-  }
-};
-
 /* Populate KFM with instances of known functions used for debugging the
    analyzer and for writing DejaGnu tests, all with a "__analyzer_" prefix.  */
 
@@ -406,8 +379,7 @@  register_known_analyzer_functions (known_function_manager &kfm)
   kfm.add ("__analyzer_eval", make_unique<kf_analyzer_eval> ());
   kfm.add ("__analyzer_get_unknown_ptr",
 	   make_unique<kf_analyzer_get_unknown_ptr> ());
-  kfm.add ("__analyzer_get_strlen",
-	   make_unique<kf_analyzer_get_strlen> ());
+  kfm.add ("__analyzer_get_strlen", make_kf_strlen ());
 }
 
 } // namespace ana
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 1601cf15c685..59f46bab581c 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1187,7 +1187,7 @@  public:
   }
 };
 
-/* Handle the on_call_pre part of "strlen".  */
+/* Handler for "strlen" and for "__analyzer_get_strlen".  */
 
 class kf_strlen : public known_function
 {
@@ -1196,37 +1196,33 @@  public:
   {
     return (cd.num_args () == 1 && cd.arg_is_pointer_p (0));
   }
-  void impl_call_pre (const call_details &cd) const final override;
-};
-
-void
-kf_strlen::impl_call_pre (const call_details &cd) const
-{
-  region_model_context *ctxt = cd.get_ctxt ();
-  region_model *model = cd.get_model ();
-  region_model_manager *mgr = cd.get_manager ();
-
-  const svalue *arg_sval = cd.get_arg_svalue (0);
-  const region *buf_reg
-    = model->deref_rvalue (arg_sval, cd.get_arg_tree (0), ctxt);
-  if (const string_region *str_reg
-      = buf_reg->dyn_cast_string_region ())
-    {
-      tree str_cst = str_reg->get_string_cst ();
-      /* TREE_STRING_LENGTH is sizeof, not strlen.  */
-      int sizeof_cst = TREE_STRING_LENGTH (str_cst);
-      int strlen_cst = sizeof_cst - 1;
-      if (cd.get_lhs_type ())
+  void impl_call_pre (const call_details &cd) const final override
+  {
+    if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
+      if (bytes_read->get_kind () != SK_UNKNOWN)
 	{
-	  tree t_cst = build_int_cst (cd.get_lhs_type (), strlen_cst);
-	  const svalue *result_sval
-	    = mgr->get_or_create_constant_svalue (t_cst);
-	  cd.maybe_set_lhs (result_sval);
+	  region_model_manager *mgr = cd.get_manager ();
+	  /* strlen is (bytes_read - 1).  */
+	  const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1);
+	  const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node,
+								MINUS_EXPR,
+								bytes_read,
+								one);
+	  cd.maybe_set_lhs (strlen_sval);
 	  return;
 	}
-    }
-  /* Otherwise a conjured value.  */
-  cd.set_any_lhs_with_defaults ();
+
+    /* Use a conjured svalue.  */
+    cd.set_any_lhs_with_defaults ();
+  }
+};
+
+/* Factory function, so that kf-analyzer.cc can use this class.  */
+
+std::unique_ptr<known_function>
+make_kf_strlen ()
+{
+  return make_unique<kf_strlen> ();
 }
 
 /* Handler for "strndup" and "__builtin_strndup".  */
@@ -1434,7 +1430,7 @@  register_known_functions (known_function_manager &kfm)
     kfm.add (BUILT_IN_STRCPY_CHK, make_unique<kf_strcpy> (3));
     kfm.add (BUILT_IN_STRDUP, make_unique<kf_strdup> ());
     kfm.add (BUILT_IN_STRNDUP, make_unique<kf_strndup> ());
-    kfm.add (BUILT_IN_STRLEN, make_unique<kf_strlen> ());
+    kfm.add (BUILT_IN_STRLEN, make_kf_strlen ());
 
     register_atomic_builtins (kfm);
     register_varargs_builtins (kfm);
diff --git a/gcc/analyzer/known-function-manager.h b/gcc/analyzer/known-function-manager.h
index 4a76136f2594..1432e548acb9 100644
--- a/gcc/analyzer/known-function-manager.h
+++ b/gcc/analyzer/known-function-manager.h
@@ -64,6 +64,8 @@  private:
   known_function *m_combined_fns_arr[CFN_LAST];
 };
 
+extern std::unique_ptr<known_function> make_kf_strlen ();
+
 } // namespace ana
 
 #endif /* GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H */
diff --git a/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c b/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c
index 1db82a76d3b3..ecd794a2337a 100644
--- a/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c
@@ -129,12 +129,12 @@  char *test_dynamic_4 (const char *src)
 
 void test_symbolic_ptr (const char *ptr)
 {
-  __analyzer_describe (0, __analyzer_get_strlen (ptr)); /* { dg-warning "UNKNOWN" } */
+  __analyzer_describe (0, __analyzer_get_strlen (ptr)); /* { dg-warning "CONJURED" } */
 }
 
 void test_symbolic_offset (size_t idx)
 {
-  __analyzer_describe (0, __analyzer_get_strlen ("abc" + idx)); /* { dg-warning "UNKNOWN" } */
+  __analyzer_describe (0, __analyzer_get_strlen ("abc" + idx)); /* { dg-warning "CONJURED" } */
 }
 
 void test_casts (void)
diff --git a/gcc/testsuite/gcc.dg/analyzer/strlen-1.c b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c
new file mode 100644
index 000000000000..99ce3aa297b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c
@@ -0,0 +1,54 @@ 
+/* See e.g. https://en.cppreference.com/w/c/string/byte/strlen */
+
+#include "analyzer-decls.h"
+
+typedef __SIZE_TYPE__ size_t;
+
+static size_t __attribute__((noinline))
+call_strlen_1 (const char *p)
+{
+  return __builtin_strlen (p);
+}
+
+void test_string (void)
+{
+  __analyzer_eval (call_strlen_1 ("abc") == 3); /* { dg-warning "TRUE" } */
+}
+
+static size_t __attribute__((noinline))
+call_strlen_2 (const char *p)
+{
+  return __builtin_strlen (p); /* { dg-warning "stack-based buffer over-read" } */
+}
+
+void test_unterminated (void)
+{
+  const char buf[3] = "abc";
+  __analyzer_eval (call_strlen_2 (buf) == 3); /* { dg-warning "UNKNOWN" } */
+}
+
+void test_uninitialized (void)
+{
+  char buf[16];
+  __builtin_strlen (buf); /* { dg-warning "use of uninitialized value 'buf\\\[0\\\]'" } */
+}
+
+void test_partially_initialized (void)
+{
+  char buf[16];
+  buf[0] = 'a';
+  __builtin_strlen (buf); /* { dg-warning "use of uninitialized value 'buf\\\[1\\\]'" } */
+}
+
+extern size_t strlen (const char *str);
+
+size_t
+test_passthrough (const char *str)
+{
+  return strlen (str);
+}
+
+/* TODO
+   - complain if NULL str
+   - should it be tainted if str's bytes are tainted?
+*/