diff mbox series

[pushed] analyzer: handle array-initialization from a string_cst [PR113999]

Message ID 20240221004833.3131939-1-dmalcolm@redhat.com
State New
Headers show
Series [pushed] analyzer: handle array-initialization from a string_cst [PR113999] | expand

Commit Message

David Malcolm Feb. 21, 2024, 12:48 a.m. UTC
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-9091-g0a6a5f8656ccf9.

gcc/analyzer/ChangeLog:
	PR analyzer/113999
	* analyzer.h (get_string_cst_size): New decl.
	* region-model-manager.cc (get_string_cst_size): New.
	(region_model_manager::maybe_get_char_from_string_cst): Treat
	single-byte accesses within string_cst but beyond
	TREE_STRING_LENGTH as being 0.
	* region-model.cc (string_cst_has_null_terminator): Likewise.

gcc/testsuite/ChangeLog:
	PR analyzer/113999
	* c-c++-common/analyzer/strlen-pr113999.c: New test.
	* gcc.dg/analyzer/strlen-1.c: More test coverage.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/analyzer.h                       |  3 ++
 gcc/analyzer/region-model-manager.cc          | 35 +++++++++++--
 gcc/analyzer/region-model.cc                  | 17 +++++-
 .../c-c++-common/analyzer/strlen-pr113999.c   |  8 +++
 gcc/testsuite/gcc.dg/analyzer/strlen-1.c      | 52 +++++++++++++++++++
 5 files changed, 109 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/strlen-pr113999.c
diff mbox series

Patch

diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
index 23e3f71df0af..20a8e3f9a1d0 100644
--- a/gcc/analyzer/analyzer.h
+++ b/gcc/analyzer/analyzer.h
@@ -430,6 +430,9 @@  byte_offset_to_json (const byte_offset_t &offset);
 extern tristate
 compare_constants (tree lhs_const, enum tree_code op, tree rhs_const);
 
+extern tree
+get_string_cst_size (const_tree string_cst);
+
 } // namespace ana
 
 extern bool is_special_named_call_p (const gcall *call, const char *funcname,
diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc
index 21e13b480257..93e72ec45a85 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -1407,6 +1407,20 @@  get_or_create_const_fn_result_svalue (tree type,
   return const_fn_result_sval;
 }
 
+/* Get a tree for the size of STRING_CST, or NULL_TREE.
+   Note that this may be larger than TREE_STRING_LENGTH (implying
+   a run of trailing zero bytes from TREE_STRING_LENGTH up to this
+   higher limit).  */
+
+tree
+get_string_cst_size (const_tree string_cst)
+{
+  gcc_assert (TREE_CODE (string_cst) == STRING_CST);
+  gcc_assert (TREE_CODE (TREE_TYPE (string_cst)) == ARRAY_TYPE);
+
+  return TYPE_SIZE_UNIT (TREE_TYPE (string_cst));
+}
+
 /* Given STRING_CST, a STRING_CST and BYTE_OFFSET_CST a constant,
    attempt to get the character at that offset, returning either
    the svalue for the character constant, or NULL if unsuccessful.  */
@@ -1420,16 +1434,27 @@  region_model_manager::maybe_get_char_from_string_cst (tree string_cst,
   /* Adapted from fold_read_from_constant_string.  */
   scalar_int_mode char_mode;
   if (TREE_CODE (byte_offset_cst) == INTEGER_CST
-      && compare_tree_int (byte_offset_cst,
-			   TREE_STRING_LENGTH (string_cst)) < 0
       && is_int_mode (TYPE_MODE (TREE_TYPE (TREE_TYPE (string_cst))),
 		      &char_mode)
       && GET_MODE_SIZE (char_mode) == 1)
     {
+      /* If we're beyond the string_cst, the read is unsuccessful.  */
+      if (compare_constants (byte_offset_cst,
+			     GE_EXPR,
+			     get_string_cst_size (string_cst)).is_true ())
+	return NULL;
+
+      int char_val;
+      if (compare_tree_int (byte_offset_cst,
+			    TREE_STRING_LENGTH (string_cst)) < 0)
+	/* We're within the area defined by TREE_STRING_POINTER.  */
+	char_val = (TREE_STRING_POINTER (string_cst)
+		    [TREE_INT_CST_LOW (byte_offset_cst)]);
+      else
+	/* We're in the padding area of trailing zeroes.  */
+	char_val = 0;
       tree char_cst
-	= build_int_cst_type (TREE_TYPE (TREE_TYPE (string_cst)),
-			      (TREE_STRING_POINTER (string_cst)
-			       [TREE_INT_CST_LOW (byte_offset_cst)]));
+	= build_int_cst_type (TREE_TYPE (TREE_TYPE (string_cst)), char_val);
       return get_or_create_constant_svalue (char_cst);
     }
   return NULL;
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index a26be7075997..6ab917465d6f 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3648,7 +3648,22 @@  string_cst_has_null_terminator (tree string_cst,
 				byte_offset_t *out_bytes_read)
 {
   gcc_assert (bytes.m_start_byte_offset >= 0);
-  gcc_assert (bytes.m_start_byte_offset < TREE_STRING_LENGTH (string_cst));
+
+  /* If we're beyond the string_cst, reads are unsuccessful.  */
+  if (tree cst_size = get_string_cst_size (string_cst))
+    if (TREE_CODE (cst_size) == INTEGER_CST)
+      if (bytes.m_start_byte_offset >= TREE_INT_CST_LOW (cst_size))
+	return tristate::unknown ();
+
+  /* Assume all bytes after TREE_STRING_LENGTH are zero.  This handles
+     the case where an array is initialized with a string_cst that isn't
+     as long as the array, where the remaining elements are
+     empty-initialized and thus zeroed.  */
+  if (bytes.m_start_byte_offset >= TREE_STRING_LENGTH (string_cst))
+    {
+      *out_bytes_read = 1;
+      return tristate (true);
+    }
 
   /* Look for the first 0 byte within STRING_CST
      from START_READ_OFFSET onwards.  */
diff --git a/gcc/testsuite/c-c++-common/analyzer/strlen-pr113999.c b/gcc/testsuite/c-c++-common/analyzer/strlen-pr113999.c
new file mode 100644
index 000000000000..f822606415a5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/strlen-pr113999.c
@@ -0,0 +1,8 @@ 
+int main() {
+  char a[20] = "ab";
+  __builtin_strcpy(a, "123");
+  long n0 = __builtin_strlen(a);
+  __builtin_strncpy(a + 3, a, n0);
+  __builtin_strlen(a);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/strlen-1.c b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c
index 99ce3aa297b5..54e8ef1dd29e 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strlen-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c
@@ -52,3 +52,55 @@  test_passthrough (const char *str)
    - complain if NULL str
    - should it be tainted if str's bytes are tainted?
 */
+
+static size_t __attribute__((noinline))
+call_strlen_3 (const char *p)
+{
+  return __builtin_strlen (p);
+}
+
+void test_array_initialization_from_shorter_literal (void)
+{
+  const char buf[10] = "abc";
+  __analyzer_eval (call_strlen_3 (buf) == 3); /* { dg-warning "TRUE" } */
+  __analyzer_eval (call_strlen_3 (buf + 5) == 0); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[5] == 0); /* { dg-warning "TRUE" } */
+}
+
+static size_t __attribute__((noinline))
+call_strlen_4 (const char *p)
+{
+  return __builtin_strlen (p); /* { dg-warning "stack-based buffer over-read" } */
+}
+
+char test_array_initialization_from_longer_literal (void)
+{
+  const char buf[3] = "abcdefg"; /* { dg-warning "initializer-string for array of 'char' is too long" } */
+  __analyzer_eval (call_strlen_4 (buf) == 3); /* { dg-warning "UNKNOWN" } */
+  return buf[5]; /* { dg-warning "stack-based buffer over-read" } */
+}
+
+static size_t __attribute__((noinline))
+call_strlen_5 (const char *p)
+{
+  return __builtin_strlen (p);
+}
+
+static size_t __attribute__((noinline))
+call_strlen_5a (const char *p)
+{
+  return __builtin_strlen (p); /* { dg-warning "stack-based buffer over-read" } */
+}
+
+char test_array_initialization_implicit_length (void)
+{
+  const char buf[] = "abc";
+  __analyzer_eval (call_strlen_5 (buf) == 3); /* { dg-warning "TRUE" } */
+  __analyzer_eval (call_strlen_5 (buf + 2) == 1); /* { dg-warning "TRUE" } */
+  __analyzer_eval (call_strlen_5 (buf + 3) == 0); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */
+  __analyzer_eval (buf[3] == 0); /* { dg-warning "TRUE" } */
+  __analyzer_eval (call_strlen_5a (buf + 4) == 0); /* { dg-warning "UNKNOWN" } */
+  return buf[4]; /* { dg-warning "stack-based buffer over-read" } */
+}
+