diff mbox series

[committed,4/6] detect unterminated const arrays in sprintf calls (PR 86552)

Message ID d5e4cfd6-551e-9b3c-9b7c-cb8b537b622b@redhat.com
State New
Headers show
Series None | expand

Commit Message

Jeff Law Oct. 3, 2018, 8 p.m. UTC
This is primarily Martin's work, tweaked to work with the updated
c_strlen API from a few weeks ago.

The API passes in a c_strlen_data structure pointer rather than a tree
pointer.  Information about the string is bubbled back to the caller via
that structure.  That's trivial.

More importantly c_strlen always returns NULL for an unterminated string
or something it can't handle.   So this hunk had to be changed:


@@ -2129,10 +2133,12 @@ get_string_length (tree str)
   if (!str)
     return fmtresult ();

-  if (tree slen = c_strlen (str, 1))
+  tree arr;
+  if (tree slen = c_strlen (str, 1, &arr))
     {
       /* Simply return the length of the string.  */
       fmtresult res (tree_to_shwi (slen));
+      res.nonstr = arr;
       return res;
     }


First, when slen is non-NULL, then we know the string was terminated and
we have length information of some kind.  So in that case we clear
res.nonstr and return the provided length.

When slen is NULL, we have either an unterminated string or something
c_strlen couldn't handle.  We detect the former by looking at the
c_strlen_data structure.  When detected we set res.nonstr to the decl
bubbled up in the c_strlen_data structure.

The one additional wrinkle here is that changes to string_constant can
result in getting a non-constant length from c_strlen (triggers in
libgfortran).  Essentially we get a COND_EXPR back. I'm punting that case.


Bootstrapped and regression tested on x86_64.  Installing on the trunk.

Now back to the AIX problems from 6 weeks ago :-)

Jeff
commit 7e0363d7a45e303064e54292c26e12934a41e2e7
Author: Jeff Law <law@torsion.usersys.redhat.com>
Date:   Thu Aug 30 18:55:16 2018 -0400

            * gimple-ssa-sprintf.c (struct fmtresult): Add new member and
            initialize it.
            (get_string_length): Detect unterminated arrays.
            (format_string): Same.
            (format_directive): Warn about unterminated arrays.
            (handle_gimple_call): Mark statements with no_warning as needed.
    
            * gcc.dg/warn-sprintf-no-nul.c: New test.
diff mbox series

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8e7cb548c15..1f479545e53 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@ 
+2018-10-03  Martin Sebor  <msebor@redhat.com>
+	    Jeff Law  <law@redhat.com>
+
+	* gimple-ssa-sprintf.c (struct fmtresult): Add new member and
+	initialize it.
+	(get_string_length): Detect unterminated arrays.
+	(format_string): Same.
+	(format_directive): Warn about unterminated arrays.
+	(handle_gimple_call): Mark statements with no_warning as needed.
+
 2018-10-03  Jim Wilson  <jimw@sifive.com>
 
 	* config/riscv/riscv-c.c (riscv_cpu_cpp_builtins): For ABI_ILP32E,
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index 9b6f6e60192..88e952828e1 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -516,7 +516,7 @@  struct fmtresult
   /* Construct a FMTRESULT object with all counters initialized
      to MIN.  KNOWNRANGE is set when MIN is valid.  */
   fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
-  : argmin (), argmax (),
+  : argmin (), argmax (), nonstr (),
     knownrange (min < HOST_WIDE_INT_MAX),
     mayfail (), nullp ()
   {
@@ -530,7 +530,7 @@  struct fmtresult
      KNOWNRANGE is set when both MIN and MAX are valid.   */
   fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
 	     unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
-  : argmin (), argmax (),
+  : argmin (), argmax (), nonstr (),
     knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
     mayfail (), nullp ()
   {
@@ -557,6 +557,10 @@  struct fmtresult
      results in on output for an argument in the range above.  */
   result_range range;
 
+  /* Non-nul when the argument of a string directive is not a nul
+     terminated string.  */
+  tree nonstr;
+
   /* True when the range above is obtained from a known value of
      a directive's argument or its bounds and not the result of
      heuristics that depend on warning levels.  */
@@ -2001,13 +2005,38 @@  get_string_length (tree str, unsigned eltsize)
   if (!str)
     return fmtresult ();
 
+  c_strlen_data data;
+  memset (&data, 0, sizeof (c_strlen_data));
+  tree slen = c_strlen (str, 1, &data, eltsize);
+  if (slen && TREE_CODE (slen) == INTEGER_CST)
+    {
+      /* The string is properly terminated and
+	 we know its length.  */
+      fmtresult res (tree_to_shwi (slen));
+      res.nonstr = NULL_TREE;
+      return res;
+    }
+  else if (!slen
+	   && data.decl
+	   && data.len
+	   && TREE_CODE (data.len) == INTEGER_CST)
+    {
+      /* STR was not properly NUL terminated, but we have
+	 length information about the unterminated string.  */
+      fmtresult res (tree_to_shwi (data.len));
+      res.nonstr = data.decl;
+      return res;
+    }
+
   /* Determine the length of the shortest and longest string referenced
      by STR.  Strings of unknown lengths are bounded by the sizes of
      arrays that subexpressions of STR may refer to.  Pointers that
      aren't known to point any such arrays result in LENRANGE[1] set
-     to SIZE_MAX.  */
+     to SIZE_MAX.  NONSTR is set to the declaration of the constant
+     array that is known not to be nul-terminated.  */
   tree lenrange[2];
-  bool flexarray = get_range_strlen (str, lenrange, eltsize);
+  tree nonstr;
+  bool flexarray = get_range_strlen (str, lenrange, eltsize, false, &nonstr);
 
   if (lenrange [0] || lenrange [1])
     {
@@ -2030,6 +2059,7 @@  get_string_length (tree str, unsigned eltsize)
 	max = HOST_WIDE_INT_M1U;
 
       fmtresult res (min, max);
+      res.nonstr = nonstr;
 
       /* Set RES.KNOWNRANGE to true if and only if all strings referenced
 	 by STR are known to be bounded (though not necessarily by their
@@ -2309,6 +2339,11 @@  format_string (const directive &dir, tree arg, vr_values *)
       res.range.unlikely = res.range.max;
     }
 
+  /* If the argument isn't a nul-terminated string and the number
+     of bytes on output isn't bounded by precision, set NONSTR.  */
+  if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
+    res.nonstr = slen.nonstr;
+
   /* Bump up the byte counters if WIDTH is greater.  */
   return res.adjust_for_width_or_precision (dir.width);
 }
@@ -2878,6 +2913,19 @@  format_directive (const sprintf_dom_walker::call_info &info,
 			  fmtres.range.min, fmtres.range.max);
     }
 
+  if (!warned && fmtres.nonstr)
+    {
+      warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
+			"%<%.*s%> directive argument is not a nul-terminated "
+			"string",
+			dirlen,
+			target_to_host (hostdir, sizeof hostdir, dir.beg));
+      if (warned && DECL_P (fmtres.nonstr))
+	inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
+		"referenced argument declared here");
+      return false;
+    }
+
   if (warned && fmtres.range.min < fmtres.range.likely
       && fmtres.range.likely < fmtres.range.max)
     inform_n (info.fmtloc, fmtres.range.likely,
@@ -3926,6 +3974,8 @@  sprintf_dom_walker::handle_gimple_call (gimple_stmt_iterator *gsi)
   format_result res = format_result ();
 
   bool success = compute_format_length (info, &res);
+  if (res.warned)
+    gimple_set_no_warning (info.callstmt, true);
 
   /* When optimizing and the printf return value optimization is enabled,
      attempt to substitute the computed result for the return value of
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a888b8ffa4d..c06842d0f4f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@ 
+2018-10-03  Martin Sebor  <msebor@redhat.com>
+
+	* gcc.dg/warn-sprintf-no-nul.c: New test.
+
 2018-10-03  Martin Liska  <mliska@suse.cz>
 
 	PR gcov-profile/86109
diff --git a/gcc/testsuite/gcc.dg/warn-sprintf-no-nul.c b/gcc/testsuite/gcc.dg/warn-sprintf-no-nul.c
new file mode 100644
index 00000000000..b331bb5aaff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/warn-sprintf-no-nul.c
@@ -0,0 +1,90 @@ 
+/* PR tree-optimization/86552 - missing warning for reading past the end
+   of non-string arrays
+   Exercise non-string detection in sprintf.
+   { dg-do compile }
+   { dg-options "-O2 -Wno-array-bounds -Wall -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+extern int sprintf (char*, const char*, ...);
+
+extern char *dst;
+
+int i0 = 0;
+int i1 = 1;
+
+void sink (int, ...);
+
+#define CONCAT(a, b)   a ## b
+#define CAT(a, b)      CONCAT(a, b)
+
+#define T(fmt, ...)				\
+  sink (sprintf (dst, fmt, __VA_ARGS__))
+
+const char a[5] = "12345";    /* { dg-message "declared here" } */
+const char b[6] = "123456";   /* { dg-message "declared here" } */
+const char a2[][3] = {
+  "", "1", "12", "123", "123\000"   /* { dg-warning "initializer-string for array of chars is too long" } */
+};
+
+
+void test_narrow (void)
+{
+  /* Verify that precision suppresses the warning when it's less
+     than the size of the array.  */
+  T ("%.0s%.1s%.2s%.3s%.4s%.5s", a, a, a, a, a, a);
+
+  T ("%s", a);          /* { dg-warning ".%s. directive argument is not a nul-terminated string" } */
+  T ("%.6s", a);        /* { dg-warning ".%.6s. directive argument is not a nul-terminated string" } */
+
+  /* Exercise conditional expressions involving strings and non-strings.  */
+  const char *s0 = i0 < 0 ? a2[0] : a2[3];
+  T ("%s", s0);         /* { dg-warning ".%s. directive argument is not a nul-terminated string" } */
+  s0 = i0 < 0 ? "123456" : a2[4];
+  T ("%s", s0);         /* { dg-warning ".%s. directive argument is not a nul-terminated string" } */
+
+  const char *s1 = i0 < 0 ? a2[3] : a2[0];
+  T ("%s", s1);         /* { dg-warning ".%s. directive argument is not a nul-terminated string" } */
+
+  const char *s2 = i0 < 0 ? a2[3] : a2[4];
+  T ("%s", s2);         /* { dg-warning ".%s. directive argument is not a nul-terminated string" } */
+
+  s0 = i0 < 0 ? a : b;
+  T ("%.5s", s0);
+
+  /* Verify that the warning triggers even if precision prevents
+     reading past the end of one of the non-terminated arrays but
+     not the other.  */
+  T ("%.6s", s0);       /* { dg-warning ".%.6s. directive argument is not a nul-terminated string" } */
+
+  s0 = i0 < 0 ? b : a;
+  T ("%.7s", s0);       /* { dg-warning ".%.7s. directive argument is not a nul-terminated string" } */
+
+  /* Verify that at -Wformat-overflow=1 the lower bound of precision
+     given by a range is used to determine whether or not to warn.  */
+  int r = SR (4, 5);
+
+  T ("%.*s", r, a);
+  T ("%.*s", r, b);
+
+  r = SR (5, 6);
+  T ("%.*s", r, a);
+  T ("%.*s", r, b);
+
+  r = SR (6, 7);
+  T ("%.*s", r, a);     /* { dg-warning ".%.\\\*s. directive argument is not a nul-terminated string" } */
+  T ("%.*s", r, b);
+}
+
+
+const wchar_t wa[5] = L"12345";   /* { dg-message "declared here" } */
+
+void test_wide (void)
+{
+  T ("%.0ls%.1ls%.2ls%.3ls%.4ls%.5ls", wa, wa, wa, wa, wa, wa);
+
+  T ("%ls", wa);        /* { dg-warning ".%ls. directive argument is not a nul-terminated string" } */
+  T ("%.6ls", wa);      /* { dg-warning ".%.6ls. directive argument is not a nul-terminated string" } */
+}