diff mbox

calloc = malloc + memset

Message ID alpine.DEB.2.02.1406231734300.3080@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse June 23, 2014, 3:51 p.m. UTC
On Mon, 23 Jun 2014, Jakub Jelinek wrote:

> Ok for trunk, sorry for the delay.

Thanks. Richard has moved the passes a bit since then, but I still have 
exactly one spot where the testsuite is ok :-) I need strlen to be after 
dom (for calloc.C) and before vrp (for several strlenopt-*.c). I'll commit 
it tomorrow if there aren't any comments on the pass placement.

2014-06-24  Marc Glisse  <marc.glisse@inria.fr>

 	PR tree-optimization/57742
gcc/
 	* tree-ssa-strlen.c (get_string_length): Ignore malloc.
 	(handle_builtin_malloc, handle_builtin_memset): New functions.
 	(strlen_optimize_stmt): Call them.
 	* passes.def: Move strlen after loop+dom but before vrp.
gcc/testsuite/
 	* g++.dg/tree-ssa/calloc.C: New testcase.
 	* gcc.dg/tree-ssa/calloc-1.c: Likewise.
 	* gcc.dg/tree-ssa/calloc-2.c: Likewise.
 	* gcc.dg/strlenopt-9.c: Adapt.

Comments

Richard Biener June 23, 2014, 4:10 p.m. UTC | #1
On June 23, 2014 5:51:30 PM CEST, Marc Glisse <marc.glisse@inria.fr> wrote:
>On Mon, 23 Jun 2014, Jakub Jelinek wrote:
>
>> Ok for trunk, sorry for the delay.
>
>Thanks. Richard has moved the passes a bit since then, but I still have
>
>exactly one spot where the testsuite is ok :-) I need strlen to be
>after 
>dom (for calloc.C) and before vrp (for several strlenopt-*.c). I'll
>commit 
>it tomorrow if there aren't any comments on the pass placement.

But vrp does not run at -O1 - does strlenopt?

>2014-06-24  Marc Glisse  <marc.glisse@inria.fr>
>
> 	PR tree-optimization/57742
>gcc/
> 	* tree-ssa-strlen.c (get_string_length): Ignore malloc.
> 	(handle_builtin_malloc, handle_builtin_memset): New functions.
> 	(strlen_optimize_stmt): Call them.
> 	* passes.def: Move strlen after loop+dom but before vrp.
>gcc/testsuite/
> 	* g++.dg/tree-ssa/calloc.C: New testcase.
> 	* gcc.dg/tree-ssa/calloc-1.c: Likewise.
> 	* gcc.dg/tree-ssa/calloc-2.c: Likewise.
> 	* gcc.dg/strlenopt-9.c: Adapt.
Marc Glisse June 23, 2014, 4:19 p.m. UTC | #2
On Mon, 23 Jun 2014, Richard Biener wrote:

> On June 23, 2014 5:51:30 PM CEST, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Mon, 23 Jun 2014, Jakub Jelinek wrote:
>>
>>> Ok for trunk, sorry for the delay.
>>
>> Thanks. Richard has moved the passes a bit since then, but I still have
>>
>> exactly one spot where the testsuite is ok :-) I need strlen to be
>> after
>> dom (for calloc.C) and before vrp (for several strlenopt-*.c). I'll
>> commit
>> it tomorrow if there aren't any comments on the pass placement.
>
> But vrp does not run at -O1 - does strlenopt?

     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },

So that's just a missed optimization at -Os, I guess.
Richard Biener June 23, 2014, 4:44 p.m. UTC | #3
On Mon, Jun 23, 2014 at 6:19 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 23 Jun 2014, Richard Biener wrote:
>
>> On June 23, 2014 5:51:30 PM CEST, Marc Glisse <marc.glisse@inria.fr>
>> wrote:
>>>
>>> On Mon, 23 Jun 2014, Jakub Jelinek wrote:
>>>
>>>> Ok for trunk, sorry for the delay.
>>>
>>>
>>> Thanks. Richard has moved the passes a bit since then, but I still have
>>>
>>> exactly one spot where the testsuite is ok :-) I need strlen to be
>>> after
>>> dom (for calloc.C) and before vrp (for several strlenopt-*.c). I'll
>>> commit
>>> it tomorrow if there aren't any comments on the pass placement.
>>
>>
>> But vrp does not run at -O1 - does strlenopt?
>
>
>     { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
>     { OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 },
>
> So that's just a missed optimization at -Os, I guess.

Ok, that's fine (not sure why we restrict all of strilenopt instead of
just those
transforms that are harmful for -Os).

Richard.

> --
> Marc Glisse
diff mbox

Patch

Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 211886)
+++ gcc/passes.def	(working copy)
@@ -179,21 +179,20 @@  along with GCC; see the file COPYING3.
 	 DOM and erroneous path isolation should be due to degenerate PHI nodes.
 	 So rather than run the full propagators, run a specialized pass which
 	 only examines PHIs to discover const/copy propagation
 	 opportunities.  */
       NEXT_PASS (pass_phi_only_cprop);
       NEXT_PASS (pass_dse);
       NEXT_PASS (pass_reassoc);
       NEXT_PASS (pass_dce);
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt);
-      NEXT_PASS (pass_strlen);
       NEXT_PASS (pass_ccp);
       /* After CCP we rewrite no longer addressed locals into SSA
 	 form if possible.  */
       NEXT_PASS (pass_copy_prop);
       NEXT_PASS (pass_cse_sincos);
       NEXT_PASS (pass_optimize_bswap);
       NEXT_PASS (pass_split_crit_edges);
       NEXT_PASS (pass_pre);
       NEXT_PASS (pass_sink_code);
       NEXT_PASS (pass_asan);
@@ -232,20 +231,21 @@  along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_loop_prefetch);
 	  NEXT_PASS (pass_iv_optimize);
 	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_tree_loop_done);
       POP_INSERT_PASSES ()
       NEXT_PASS (pass_lower_vector_ssa);
       NEXT_PASS (pass_cse_reciprocals);
       NEXT_PASS (pass_reassoc);
       NEXT_PASS (pass_strength_reduction);
       NEXT_PASS (pass_dominator);
+      NEXT_PASS (pass_strlen);
       NEXT_PASS (pass_vrp);
       /* The only const/copy propagation opportunities left after
 	 DOM and VRP should be due to degenerate PHI nodes.  So rather than
 	 run the full propagators, run a specialized pass which
 	 only examines PHIs to discover const/copy propagation
 	 opportunities.  */
       NEXT_PASS (pass_phi_only_cprop);
       NEXT_PASS (pass_cd_dce);
       NEXT_PASS (pass_tracer);
       NEXT_PASS (pass_dse);
Index: gcc/testsuite/g++.dg/tree-ssa/calloc.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/calloc.C	(revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/calloc.C	(working copy)
@@ -0,0 +1,50 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+typedef __SIZE_TYPE__ size_t;
+inline void* operator new(size_t, void* p) throw() { return p; }
+
+typedef void (*handler_t)(void);
+extern handler_t get_handle();
+
+inline void* operator new(size_t sz)
+{
+  void *p;
+
+  if (sz == 0)
+    sz = 1;
+
+  while ((p = __builtin_malloc (sz)) == 0)
+    {
+      handler_t handler = get_handle ();
+      if (! handler)
+        throw 42;
+      handler ();
+    }
+  return p;
+}
+
+struct vect {
+  int *start, *end;
+  vect(size_t n) {
+    start = end = 0;
+    if (n > (size_t)-1 / sizeof(int))
+      throw 33;
+    if (n != 0)
+      start = static_cast<int*> (operator new (n * sizeof(int)));
+    end = start + n;
+    int *p = start;
+    for (size_t l = n; l > 0; --l, ++p)
+      *p = 0;
+  }
+};
+
+void f (void *p, int n)
+{
+  new (p) vect(n);
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/strlenopt-9.c
===================================================================
--- gcc/testsuite/gcc.dg/strlenopt-9.c	(revision 211886)
+++ gcc/testsuite/gcc.dg/strlenopt-9.c	(working copy)
@@ -11,21 +11,21 @@  fn1 (int r)
      optimized away.  */
   return strchr (p, '\0');
 }
 
 __attribute__((noinline, noclone)) size_t
 fn2 (int r)
 {
   char *p, q[10];
   strcpy (q, "abc");
   p = r ? "a" : q;
-  /* String length for p varies, therefore strlen below isn't
+  /* String length is constant for both alternatives, and strlen is
      optimized away.  */
   return strlen (p);
 }
 
 __attribute__((noinline, noclone)) size_t
 fn3 (char *p, int n)
 {
   int i;
   p = strchr (p, '\0');
   /* strcat here can be optimized into memcpy.  */
@@ -91,19 +91,19 @@  main ()
       || memcmp (b, "aaaabcdabcdefgabcdefgefgabcdabcd", 33) != 0
       || memcmp (buf, "aaaabcdabcdefgabcdefgefgabcdefg", 32) != 0)
     abort ();
   if (fn4 (b, 256) != 4
       || memcmp (b, "aaaabcdabcdefgabcdefgefgabcdabcdabcd", 37) != 0
       || memcmp (buf, "aaaabcdabcdefgabcdefgefgabcdabcdefgefg", 39) != 0)
     abort ();
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen \\(" 4 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strlen \\(" 3 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "memcpy \\(" 6 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcpy \\(" 1 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "strchr \\(" 3 "strlen" } } */
 /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */
 /* { dg-final { cleanup-tree-dump "strlen" } } */
 /* { dg-final { scan-tree-dump-times "return 4;" 1 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/calloc-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/calloc-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/calloc-1.c	(working copy)
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern int a;
+extern int *b;
+int n;
+void* f(long *q)
+{
+  int *p = __builtin_malloc (n);
+  ++*q;
+  if (p)
+  {
+    ++*q;
+    a = 2;
+    __builtin_memset (p, 0, n);
+    *b = 3;
+  }
+  return p;
+}
+void* g(void)
+{
+  float *p = __builtin_calloc (8, 4);
+  return __builtin_memset (p, 0, 24); // not 32
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/calloc-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/calloc-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/calloc-2.c	(working copy)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int n, nn;
+void* f()
+{
+  char *p = __builtin_calloc (n, 1);
+  p[42] = '\n';
+  __builtin_memset (p, 0, nn);
+  return p;
+}
+
+void* g(int m1, int m2)
+{
+  char *p = __builtin_malloc (m2);
+  while (--m1)
+  {
+    __builtin_memset (p, 0, m2);
+    p[n] = 'b';
+  }
+  return p;
+}
+
+/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "memset" 2 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/tree-ssa-strlen.c
===================================================================
--- gcc/tree-ssa-strlen.c	(revision 211886)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -496,20 +496,23 @@  get_string_length (strinfo si)
 	case BUILT_IN_STPCPY_CHK:
 	  gcc_assert (lhs != NULL_TREE);
 	  loc = gimple_location (stmt);
 	  si->endptr = lhs;
 	  si->stmt = NULL;
 	  lhs = fold_convert_loc (loc, size_type_node, lhs);
 	  si->length = fold_convert_loc (loc, size_type_node, si->ptr);
 	  si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
 					lhs, si->length);
 	  break;
+	case BUILT_IN_MALLOC:
+	  break;
+	/* BUILT_IN_CALLOC always has si->length set.  */
 	default:
 	  gcc_unreachable ();
 	  break;
 	}
     }
 
   return si->length;
 }
 
 /* Invalidate string length information for strings whose length
@@ -521,20 +524,21 @@  maybe_invalidate (gimple stmt)
   strinfo si;
   unsigned int i;
   bool nonempty = false;
 
   for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
     if (si != NULL)
       {
 	if (!si->dont_invalidate)
 	  {
 	    ao_ref r;
+	    /* Do not use si->length.  */
 	    ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
 	    if (stmt_may_clobber_ref_p_1 (stmt, &r))
 	      {
 		set_strinfo (i, NULL);
 		free_strinfo (si);
 		continue;
 	      }
 	  }
 	si->dont_invalidate = false;
 	nonempty = true;
@@ -1608,20 +1612,93 @@  handle_builtin_strcat (enum built_in_fun
 	{
 	  laststmt.stmt = stmt;
 	  laststmt.len = srclen;
 	  laststmt.stridx = dsi->idx;
 	}
     }
   else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
     fprintf (dump_file, "not possible.\n");
 }
 
+/* Handle a call to malloc or calloc.  */
+
+static void
+handle_builtin_malloc (enum built_in_function bcode, gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  tree lhs = gimple_call_lhs (stmt);
+  gcc_assert (get_stridx (lhs) == 0);
+  int idx = new_stridx (lhs);
+  tree length = NULL_TREE;
+  if (bcode == BUILT_IN_CALLOC)
+    length = build_int_cst (size_type_node, 0);
+  strinfo si = new_strinfo (lhs, idx, length);
+  if (bcode == BUILT_IN_CALLOC)
+    si->endptr = lhs;
+  set_strinfo (idx, si);
+  si->writable = true;
+  si->stmt = stmt;
+  si->dont_invalidate = true;
+}
+
+/* Handle a call to memset.
+   After a call to calloc, memset(,0,) is unnecessary.
+   memset(malloc(n),0,n) is calloc(n,1).  */
+
+static bool
+handle_builtin_memset (gimple_stmt_iterator *gsi)
+{
+  gimple stmt2 = gsi_stmt (*gsi);
+  if (!integer_zerop (gimple_call_arg (stmt2, 1)))
+    return true;
+  tree ptr = gimple_call_arg (stmt2, 0);
+  int idx1 = get_stridx (ptr);
+  if (idx1 <= 0)
+    return true;
+  strinfo si1 = get_strinfo (idx1);
+  if (!si1)
+    return true;
+  gimple stmt1 = si1->stmt;
+  if (!stmt1 || !is_gimple_call (stmt1))
+    return true;
+  tree callee1 = gimple_call_fndecl (stmt1);
+  if (!gimple_call_builtin_p (stmt1, BUILT_IN_NORMAL))
+    return true;
+  enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
+  tree size = gimple_call_arg (stmt2, 2);
+  if (code1 == BUILT_IN_CALLOC)
+    /* Not touching stmt1 */ ;
+  else if (code1 == BUILT_IN_MALLOC
+	   && operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
+    {
+      gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt1);
+      update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
+			  size, build_one_cst (size_type_node));
+    }
+  else
+    return true;
+  tree lhs = gimple_call_lhs (stmt2);
+  unlink_stmt_vdef (stmt2);
+  if (lhs)
+    {
+      gimple assign = gimple_build_assign (lhs, ptr);
+      gsi_replace (gsi, assign, false);
+    }
+  else
+    {
+      gsi_remove (gsi, true);
+      release_defs (stmt2);
+    }
+
+  return false;
+}
+
 /* Handle a POINTER_PLUS_EXPR statement.
    For p = "abcd" + 2; compute associated length, or if
    p = q + off is pointing to a '\0' character of a string, call
    zero_length_string on it.  */
 
 static void
 handle_pointer_plus (gimple_stmt_iterator *gsi)
 {
   gimple stmt = gsi_stmt (*gsi);
   tree lhs = gimple_assign_lhs (stmt), off;
@@ -1845,20 +1922,28 @@  strlen_optimize_stmt (gimple_stmt_iterat
 	  case BUILT_IN_MEMCPY:
 	  case BUILT_IN_MEMCPY_CHK:
 	  case BUILT_IN_MEMPCPY:
 	  case BUILT_IN_MEMPCPY_CHK:
 	    handle_builtin_memcpy (DECL_FUNCTION_CODE (callee), gsi);
 	    break;
 	  case BUILT_IN_STRCAT:
 	  case BUILT_IN_STRCAT_CHK:
 	    handle_builtin_strcat (DECL_FUNCTION_CODE (callee), gsi);
 	    break;
+	  case BUILT_IN_MALLOC:
+	  case BUILT_IN_CALLOC:
+	    handle_builtin_malloc (DECL_FUNCTION_CODE (callee), gsi);
+	    break;
+	  case BUILT_IN_MEMSET:
+	    if (!handle_builtin_memset (gsi))
+	      return false;
+	    break;
 	  default:
 	    break;
 	  }
     }
   else if (is_gimple_assign (stmt))
     {
       tree lhs = gimple_assign_lhs (stmt);
 
       if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (lhs)))
 	{