handle local aggregate initialization in strlen (PR 83821)

Message ID 3c35d53d-41e6-2813-d447-c38931ddd535@gmail.com
State New
Headers show
Series
  • handle local aggregate initialization in strlen (PR 83821)
Related show

Commit Message

Martin Sebor Jan. 12, 2018, 9:30 p.m.
A failure in a test for the recently enhanced -Warray-bounds
warning exposed an unnecessarily broad restriction in the strlen
pass that prevents it from tracking the length of a member string
of locally defined and initialized struct:

   void f (void)
   {
     struct { char s[8]; int i } a = { "1234", 5 };

     if (strlen (a.s) != 4)   // not folded
       abort ();
    }

IIUC, the restriction was in place to account for writes into
an array changing or invalidating the length of a string stored
in its initial elements.  This would happen if the write either
changed the string's terminating nul byte, or if it reset one
of the prior non-nul bytes.

To reflect just this intent the restriction can be tightened
up to improve the pass' ability to track even the lengths of
string members of locally initialized aggregates.  Besides
leading to better code this change also clears up the test
failure.

Tested on x86_64-linux.

Martin

Comments

Martin Sebor Jan. 18, 2018, 7:13 p.m. | #1
Ping: https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01131.html

This was submitted in stage 3 but if fixing xfailed assertions
in tests by enhancing optimizations is out of scope for the
current stage let me know so I can schedule this change for
GCC 9.

On 01/12/2018 02:30 PM, Martin Sebor wrote:
> A failure in a test for the recently enhanced -Warray-bounds
> warning exposed an unnecessarily broad restriction in the strlen
> pass that prevents it from tracking the length of a member string
> of locally defined and initialized struct:
>
>   void f (void)
>   {
>     struct { char s[8]; int i } a = { "1234", 5 };
>
>     if (strlen (a.s) != 4)   // not folded
>       abort ();
>    }
>
> IIUC, the restriction was in place to account for writes into
> an array changing or invalidating the length of a string stored
> in its initial elements.  This would happen if the write either
> changed the string's terminating nul byte, or if it reset one
> of the prior non-nul bytes.
>
> To reflect just this intent the restriction can be tightened
> up to improve the pass' ability to track even the lengths of
> string members of locally initialized aggregates.  Besides
> leading to better code this change also clears up the test
> failure.
>
> Tested on x86_64-linux.
>
> Martin
>

Patch

PR tree-optimization/83821 - local aggregate initialization defeats strlen optimization

gcc/ChangeLog:

	PR tree-optimization/83821
	* tree-ssa-strlen.c (maybe_invalidate): Consider the length of
	a string when available.
	(handle_char_store): Reset calloc statement on a non-nul store.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83821
	* c-c++-common/Warray-bounds-4.c: Remove XFAIL.
	* gcc.dg/strlenopt-43.c: New test.
	* gcc.dg/strlenopt-44.c: Same.
	* gcc.dg/tree-ssa/calloc-4.c: Same.

Index: gcc/testsuite/gcc.dg/strlenopt-43.c
===================================================================
--- gcc/testsuite/gcc.dg/strlenopt-43.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-43.c	(working copy)
@@ -0,0 +1,178 @@ 
+/* PR tree-optimization/83821 - local aggregate initialization defeats
+   strlen optimization
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {				\
+    extern void FAILNAME (name) (void);		\
+    FAILNAME (name)();				\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+     call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+/* Macro to emit a call to a function named
+     call_made_in_{true,false}_branch_on_line_NNN()
+   for each call that's expected to be retained.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that the expected number of both kinds of calls appears in output
+   (a pair for each line with the invocation of the KEEP() macro.  */
+#define KEEP(expr)				\
+  if (expr)					\
+    FAIL (made_in_true_branch);			\
+  else						\
+    FAIL (made_in_false_branch)
+
+#define STR10 "0123456789"
+#define STR20 STR10 STR10
+#define STR30 STR20 STR10
+#define STR40 STR20 STR20
+
+struct Consec
+{
+  char s1[sizeof STR40];
+  char s2[sizeof STR40];
+  const char *p1;
+  const char *p2;
+};
+
+void elim_init_consecutive (void)
+{
+  struct Consec a = { STR10, STR10, STR10, STR10 };
+
+  ELIM (strlen (a.s1) == sizeof STR10 - 1);
+  ELIM (strlen (a.s2) == sizeof STR10 - 1);
+  ELIM (strlen (a.p1) == sizeof STR10 - 1);
+  ELIM (strlen (a.p2) == sizeof STR10 - 1);
+}
+
+
+void elim_array_init_consecutive (void)
+{
+  struct Consec a[2] = {
+    { STR10, STR20, STR30, STR40 },
+    { STR40, STR30, STR20, STR10 }
+  };
+
+  ELIM (strlen (a[0].s1) == sizeof STR10 - 1);
+  ELIM (strlen (a[0].s2) == sizeof STR20 - 1);
+  ELIM (strlen (a[0].p1) == sizeof STR30 - 1);
+  ELIM (strlen (a[0].p2) == sizeof STR40 - 1);
+
+  ELIM (strlen (a[1].s1) == sizeof STR40 - 1);
+  ELIM (strlen (a[1].s2) == sizeof STR30 - 1);
+  ELIM (strlen (a[1].p1) == sizeof STR20 - 1);
+  ELIM (strlen (a[1].p2) == sizeof STR10 - 1);
+}
+
+
+struct NonConsec
+{
+  char s1[sizeof STR40];
+  int i1;
+  char s2[sizeof STR40];
+  int i2;
+  const char *p1;
+  int i3;
+  const char *p2;
+  int i4;
+};
+
+void elim_init_nonconsecutive (void)
+{
+  struct NonConsec b = { STR10, 123, STR20, 456, b.s1, 789, b.s2, 123 };
+
+  ELIM (strlen (b.s1) == sizeof STR10 - 1);
+  ELIM (strlen (b.s2) == sizeof STR20 - 1);
+  ELIM (strlen (b.p1) == sizeof STR10 - 1);
+  ELIM (strlen (b.p2) == sizeof STR20 - 1);
+}
+
+void elim_assign_tmp_nonconsecutive (void)
+{
+  struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d", 4 };
+
+  b = (struct NonConsec){ STR10, 123, STR20, 456, STR30, 789, STR40, 123 };
+
+  ELIM (strlen (b.s1) == sizeof STR10 - 1);
+  ELIM (strlen (b.s2) == sizeof STR20 - 1);
+  ELIM (strlen (b.p1) == sizeof STR30 - 1);
+  ELIM (strlen (b.p2) == sizeof STR40 - 1);
+}
+
+const struct NonConsec bcst = {
+  STR40, -1, STR30, -2, STR20, -3, STR10, -4
+};
+
+void elim_assign_cst_nonconsecutive (void)
+{
+  struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d" };
+
+  b = bcst;
+
+  ELIM (strlen (b.s1) == sizeof STR40 - 1);
+  ELIM (strlen (b.s2) == sizeof STR30 - 1);
+  ELIM (strlen (b.p1) == sizeof STR20 - 1);
+  ELIM (strlen (b.p2) == sizeof STR10 - 1);
+}
+
+void elim_copy_cst_nonconsecutive (void)
+{
+  struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d" };
+  memcpy (&b, &bcst, sizeof b);
+
+  /* ELIM (strlen (b.s1) == sizeof STR40 - 1);
+     ELIM (strlen (b.s2) == sizeof STR30 - 1); */
+  ELIM (strlen (b.p1) == sizeof STR20 - 1);
+  ELIM (strlen (b.p2) == sizeof STR10 - 1);
+}
+
+
+#line 1000
+
+int sink (void*);
+
+void keep_init_nonconsecutive (void)
+{
+  struct NonConsec b = {
+    STR10, 123, STR20, 456, b.s1, 789, b.s2,
+    sink (&b)
+  };
+
+  KEEP (strlen (b.s1) == sizeof STR10 - 1);
+  KEEP (strlen (b.s2) == sizeof STR10 - 1);
+  KEEP (strlen (b.p1) == sizeof STR10 - 1);
+  KEEP (strlen (b.p2) == sizeof STR10 - 1);
+}
+
+void keep_assign_tmp_nonconsecutive (void)
+{
+  struct NonConsec b = { "a", 1, "b", 2, "c", 3, "d", 4 };
+
+  b = (struct NonConsec){
+    STR10, 123, STR20, 456, STR30, 789, STR40,
+    sink (&b)
+  };
+
+  KEEP (strlen (b.s1) == sizeof STR10 - 1);
+  KEEP (strlen (b.s2) == sizeof STR20 - 1);
+  KEEP (strlen (b.p1) == sizeof STR30 - 1);
+  KEEP (strlen (b.p2) == sizeof STR40 - 1);
+}
+
+/* { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated_" 0 "optimized" } }
+
+   { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } }
+   { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } } */
+
Index: gcc/testsuite/gcc.dg/strlenopt-44.c
===================================================================
--- gcc/testsuite/gcc.dg/strlenopt-44.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-44.c	(working copy)
@@ -0,0 +1,41 @@ 
+/* PR tree-optimization/83821 - local aggregate initialization defeats
+   strlen optimization
+   Verify that a strlen() call is eliminated for a pointer to a region
+   of memory allocated by calloc() even if one or more nul bytes are
+   written into it.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned n;
+
+void* elim_strlen_calloc_store_memset_1 (unsigned a, unsigned b)
+{
+  char *p = __builtin_calloc (a, 1);
+
+  p[1] = '\0';
+
+  __builtin_memset (p, 0, b);
+
+  n = __builtin_strlen (p);
+
+  return p;
+}
+
+void* elim_strlen_calloc_store_memset_2 (unsigned a, unsigned b, unsigned c)
+{
+  char *p = __builtin_calloc (a, 1);
+
+  p[1] = '\0';
+  __builtin_memset (p, 0, b);
+
+  n = __builtin_strlen (p);
+
+  p[3] = 0;
+  __builtin_memset (p, 0, c);
+
+  n = __builtin_strlen (p);
+
+  return p;
+}
+
+/* { dg-final { scan-tree-dump-not "strlen" "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/calloc-4.c	(working copy)
@@ -0,0 +1,39 @@ 
+/* PR tree-optimization/83821 - local aggregate initialization defeats
+   strlen optimization
+   Verify that a memset() call to zero out a subregion of memory
+   allocated by calloc() is eliminated even if a zero byte is written
+   into it in between the two calls.  See the calloc-2.c test that
+   verifies that the memset() calls isn't eliminated if the written
+   value is non-zero.
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-optimized" } */
+
+void* elim_calloc_store_memset_1 (unsigned a, unsigned b)
+{
+  char *p = __builtin_calloc (a, 1);
+
+  p[1] = '\0';
+
+  __builtin_memset (p, 0, b);
+
+  n = __builtin_strlen (p);
+
+  return p;
+}
+
+void* elim_calloc_store_memset_2 (unsigned a, unsigned b, unsigned c)
+{
+  char *p = __builtin_calloc (a, 1);
+
+  p[1] = '\0';
+  __builtin_memset (p, 0, b);
+
+  p[3] = 0;
+  __builtin_memset (p, 0, c);
+
+  return p;
+}
+
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } }
+   { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } }
+   { dg-final { scan-tree-dump-not "memset" "optimized" } } */
Index: gcc/tree-ssa-strlen.c
===================================================================
--- gcc/tree-ssa-strlen.c	(revision 256590)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -693,8 +693,16 @@  maybe_invalidate (gimple *stmt)
 	if (!si->dont_invalidate)
 	  {
 	    ao_ref r;
-	    /* Do not use si->nonzero_chars.  */
-	    ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
+	    tree size = NULL_TREE;
+	    if (si->nonzero_chars)
+	      {
+		/* Include the terminating nul in the size of the string
+		   to consider when determining possible clobber.  */
+		tree type = TREE_TYPE (si->nonzero_chars);
+		size = fold_build2 (PLUS_EXPR, type, si->nonzero_chars,
+				    build_int_cst (type, 1));
+	      }
+	    ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
 	    if (stmt_may_clobber_ref_p_1 (stmt, &r))
 	      {
 		set_strinfo (i, NULL);
@@ -2839,8 +2847,23 @@  handle_char_store (gimple_stmt_iterator *gsi)
 	    si = get_strinfo (idx);
 	  if (offset == 0)
 	    ssaname = TREE_OPERAND (lhs, 0);
-	  else if (si == NULL || compare_nonzero_chars (si, offset) < 0)
-	    return true;
+	  else if (si == NULL)
+            return true;
+	  else if (compare_nonzero_chars (si, offset) < 0)
+	    {
+	      if (si->stmt && !initializer_zerop (rhs))
+		{
+		  /* Reset the calloc statement that created the string
+		     if a non-zero value is being written into it, even
+		     if it's past the leading nul byte, so that subsequent
+		     memset calls aren't eliminated that may clear that
+		     byte.  */
+		  tree fn = gimple_call_fndecl (si->stmt);
+		  if (DECL_FUNCTION_CODE (fn) == BUILT_IN_CALLOC)
+		    si->stmt = NULL;
+		}
+	      return true;
+	    }
 	}
     }
   else