diff mbox

handle bzero/bcopy in DSE and aliasing (PR 80933, 80934)

Message ID 67a113e7-ee14-8afe-9deb-6c2c26927505@gmail.com
State New
Headers show

Commit Message

Martin Sebor June 8, 2017, 2:33 a.m. UTC
On 06/07/2017 02:12 PM, Martin Sebor wrote:
> On 06/07/2017 02:01 PM, Marc Glisse wrote:
>> On Wed, 7 Jun 2017, Bernhard Reutner-Fischer wrote:
>>
>>> On 7 June 2017 16:46:53 CEST, Martin Sebor <msebor@gmail.com> wrote:
>>>> On 06/07/2017 02:23 AM, Richard Biener wrote:
>>>>> On Wed, Jun 7, 2017 at 5:26 AM, Martin Sebor <msebor@gmail.com>
>>>> wrote:
>>>>>>> Note I'd be _much_ more sympathetic to simply canonicalizing all of
>>>>>>> bzero and bcopy
>>>>>>> to memset / memmove and be done with all the above complexity.
>>>>>>
>>>>>>
>>>>>> Attached is an updated patch along these lines.  Please let me
>>>>>> know if it matches your expectations.
>>>>>
>>>>> I think you attached the wrong patch.
>>>>
>>>> Yes I did, sorry.  The correct one is attached.
>>>
>>> Under POSIX.1-2008 "optimizing" bzero or bcmp is IMO plain wrong.
>>>
>>> It's like optimizing foo() to a random built-in but maybe that's just
>>> me. If your libc provides a define to a standard function for these
>>> under a compat knob then fine but otherwise you should fix that.
>>> *shrug*. Joseph?
>>
>> The patch optimizes __builtin_bzero, which should be ok. The question
>> (independent from this patch) is then under what conditions bzero should
>> be detected as a builtin.
>
> Yes.  The problem is that unlike for C and C++, GCC doesn't have
> a mechanism to select the target version of POSIX.  I think it
> should.
>
> But there is a subtle problem with the patch that needs fixing.
> Bcopy should not be transformed to memcpy but rather memmove.
> I'll fix that before committing.

Attached is an updated patch with this fix.  I also added a cast
from bcopy and bzero to void to detect accidental uses of the
return value.  Tested on x86_64-linux.

Martin

Comments

Richard Biener June 8, 2017, 7:51 a.m. UTC | #1
On Thu, Jun 8, 2017 at 4:33 AM, Martin Sebor <msebor@gmail.com> wrote:
> On 06/07/2017 02:12 PM, Martin Sebor wrote:
>>
>> On 06/07/2017 02:01 PM, Marc Glisse wrote:
>>>
>>> On Wed, 7 Jun 2017, Bernhard Reutner-Fischer wrote:
>>>
>>>> On 7 June 2017 16:46:53 CEST, Martin Sebor <msebor@gmail.com> wrote:
>>>>>
>>>>> On 06/07/2017 02:23 AM, Richard Biener wrote:
>>>>>>
>>>>>> On Wed, Jun 7, 2017 at 5:26 AM, Martin Sebor <msebor@gmail.com>
>>>>>
>>>>> wrote:
>>>>>>>>
>>>>>>>> Note I'd be _much_ more sympathetic to simply canonicalizing all of
>>>>>>>> bzero and bcopy
>>>>>>>> to memset / memmove and be done with all the above complexity.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Attached is an updated patch along these lines.  Please let me
>>>>>>> know if it matches your expectations.
>>>>>>
>>>>>>
>>>>>> I think you attached the wrong patch.
>>>>>
>>>>>
>>>>> Yes I did, sorry.  The correct one is attached.
>>>>
>>>>
>>>> Under POSIX.1-2008 "optimizing" bzero or bcmp is IMO plain wrong.
>>>>
>>>> It's like optimizing foo() to a random built-in but maybe that's just
>>>> me. If your libc provides a define to a standard function for these
>>>> under a compat knob then fine but otherwise you should fix that.
>>>> *shrug*. Joseph?
>>>
>>>
>>> The patch optimizes __builtin_bzero, which should be ok. The question
>>> (independent from this patch) is then under what conditions bzero should
>>> be detected as a builtin.
>>
>>
>> Yes.  The problem is that unlike for C and C++, GCC doesn't have
>> a mechanism to select the target version of POSIX.  I think it
>> should.
>>
>> But there is a subtle problem with the patch that needs fixing.
>> Bcopy should not be transformed to memcpy but rather memmove.
>> I'll fix that before committing.
>
>
> Attached is an updated patch with this fix.  I also added a cast
> from bcopy and bzero to void to detect accidental uses of the
> return value.  Tested on x86_64-linux.

Please do not add foldings to builtins.c but instead add them to gimple-fold.c.

+  /* Call memset and return the result cast to void to detect its use
+     (bzero returns void).  */
+  tree call = build_call_expr_loc (loc, fn, 3, dst, integer_zero_node, len);
+  return fold_convert (void_type_node, call);

???  How can the result be used if the original call result was not?

Thanks,
Richard.

> Martin
diff mbox

Patch

PR tree-optimization/80934 - bzero should be assumed not to escape pointer argument
PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated

gcc/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* builtins.c (fold_builtin_bcmp, fold_builtin_bcopy): New functions.
	(fold_builtin_bzero): Likewise.
	(fold_builtin_2): Handle bzero.
	(fold_builtin_3): Handle bcmp and bcpy.

gcc/testsuite/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* gcc.dg/fold-bcopy.c: New test.
	* gcc.dg/tree-ssa/ssa-dse-30.c: Likewise..
	* gcc.dg/tree-ssa/alias-36.c: Likewise.
	* gcc/testsuite/gcc.dg/pr79214.c: Adjust.
	* gcc.dg/tree-prof/val-prof-7.c: Likewise.
	* gcc.dg/Wsizeof-pointer-memaccess1.c: Likewise.
	* gcc.dg/builtins-nonnull.c: Likewise.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 30462ad..52d42b9 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -145,6 +145,9 @@  static rtx expand_builtin_unop (machine_mode, tree, rtx, rtx, optab);
 static rtx expand_builtin_frame_address (tree, tree);
 static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
+static tree fold_builtin_bcmp (location_t, tree, tree, tree);
+static tree fold_builtin_bcopy (location_t, tree, tree, tree);
+static tree fold_builtin_bzero (location_t, tree, tree);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
 static tree fold_builtin_strlen (location_t, tree, tree);
@@ -7982,6 +7985,56 @@  fold_builtin_sincos (location_t loc,
 			 fold_build1_loc (loc, REALPART_EXPR, type, call)));
 }
 
+/* Fold function call to built-in bzero with arguments SRC and LEN
+   into a call to built-in memset (DST, 0, LEN).  */
+
+static tree
+fold_builtin_bzero (location_t loc, tree dst, tree len)
+{
+  if (!validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMSET);
+  /* Call memset and return the result cast to void to detect its use
+     (bzero returns void).  */
+  tree call = build_call_expr_loc (loc, fn, 3, dst, integer_zero_node, len);
+  return fold_convert (void_type_node, call);
+}
+
+/* Fold function call to built-in bcmp with arguments ARG1, ARG2, and LEN
+   into a call to built-in memcmp(ARG1, ARG2, LEN).  */
+
+static tree
+fold_builtin_bcmp (location_t loc, tree arg1, tree arg2, tree len)
+{
+  if (!validate_arg (arg1, POINTER_TYPE)
+      || !validate_arg (arg2, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMCMP);
+  return build_call_expr_loc (loc, fn, 3, arg1, arg2, len);
+}
+
+/* Fold function call to built-in bcopy with arguments SRC, DST, and LEN
+   into a call to built-in memmove(DST, SRC, LEN).  */
+
+static tree
+fold_builtin_bcopy (location_t loc, tree src, tree dst, tree len)
+{
+  if (!validate_arg (src, POINTER_TYPE)
+      || !validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  /* bcopy has been removed from POSIX in Issue 7 but Issue 6 specifies
+     it's quivalent to memmove (not memcpy).  Call memmove and return
+     the result cast to void to detect its use (bcopy returns void).  */
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMMOVE);
+  return build_call_expr_loc (loc, fn, 3, dst, src, len);
+}
+
 /* Fold function call to builtin memcmp with arguments ARG1 and ARG2.
    Return NULL_TREE if no simplification can be made.  */
 
@@ -8947,6 +9000,9 @@  fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1)
     CASE_FLT_FN (BUILT_IN_MODF):
       return fold_builtin_modf (loc, arg0, arg1, type);
 
+    case BUILT_IN_BZERO:
+      return fold_builtin_bzero (loc, arg0, arg1);
+
     case BUILT_IN_STRSPN:
       return fold_builtin_strspn (loc, arg0, arg1);
 
@@ -9034,7 +9090,12 @@  fold_builtin_3 (location_t loc, tree fndecl,
 	return do_mpfr_remquo (arg0, arg1, arg2);
     break;
 
+    case BUILT_IN_BCOPY:
+      return fold_builtin_bcopy (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_BCMP:
+      return fold_builtin_bcmp (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_MEMCMP:
       return fold_builtin_memcmp (loc, arg0, arg1, arg2);;
 
diff --git a/gcc/testsuite/gcc.dg/fold-bcopy.c b/gcc/testsuite/gcc.dg/fold-bcopy.c
new file mode 100644
index 0000000..ce5b317
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bcopy.c
@@ -0,0 +1,43 @@ 
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O0 -Wall -fdump-tree-gimple" } */
+
+void f0 (void *p, const void *q, unsigned n)
+{
+  /* Bcopy corresponds to memmmove, not memcpy.  */
+  __builtin_bcopy (q, p, n);
+}
+
+void f1 (void *p, const void *q, unsigned n)
+{
+  /* This call should be eliminated.  */
+  __builtin_bcopy (q, p, 0);
+}
+
+int f2 (const void *p, const void *q, unsigned n)
+{
+  return __builtin_bcmp (q, p, n);
+}
+
+int f3 (const void *p, const void *q, unsigned n)
+{
+  /* This call should be folded into 0.  */
+  return __builtin_bcmp (q, p, 0);
+}
+
+void f4 (void *p, unsigned n)
+{
+  __builtin_bzero (p, n);
+}
+
+void f5 (void *p)
+{
+  /* This call should be eliminated.  */
+  __builtin_bzero (p, 0);
+}
+
+/* Verify that calls to bcmp, bcopy, and bzero have all been removed
+   and one of each replaced with memcmp, memmove, and memset, respectively.
+   The remaining three should be eliminated.
+  { dg-final { scan-tree-dump-not "bcmp|bcopy|bzero" "gimple" } }
+  { dg-final { scan-tree-dump-times "memcmp|memmove|memset" 3 "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/pr79214.c b/gcc/testsuite/gcc.dg/pr79214.c
index 79d2a25..6cf254fb 100644
--- a/gcc/testsuite/gcc.dg/pr79214.c
+++ b/gcc/testsuite/gcc.dg/pr79214.c
@@ -22,7 +22,7 @@  size_t range (void)
 
 void test_bzero (void)
 {
-  bzero (d, range ());   /* { dg-warning ".__builtin_bzero. writing 4 or more bytes into a region of size 3 overflows the destination" } */
+  bzero (d, range ());   /* { dg-warning ".__builtin_(bzero|memset). writing 4 or more bytes into a region of size 3 overflows the destination" } */
 }
 
 void test_memcpy (void)
diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
index 84ec9fb..5a4e777 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
@@ -4,14 +4,10 @@ 
 char *buffer1;
 char *buffer2;
 
+/* Bzero is not tested because it gets transformed into memset.  */
+
 #define DEFINE_TEST(N) \
 __attribute__((noinline)) \
-void bzero_test_ ## N (int len) \
-{ \
-  __builtin_bzero (buffer1, len); \
-} \
- \
-__attribute__((noinline)) \
 void memcpy_test_ ## N (int len) \
 { \
   __builtin_memcpy (buffer1, buffer2, len); \
@@ -31,7 +27,6 @@  void memset_test_ ## N (int len) \
  \
 void test_stringops_ ## N(int len) \
 { \
-  bzero_test_ ## N (len); \
   memcpy_test_## N (len); \
   mempcpy_test_ ## N (len); \
   memset_test_ ## N (len); \
@@ -64,10 +59,6 @@  int main() {
   return 0;
 }
 
-/* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 8 stringop transformation on __builtin_bzero" "profile" } } */
-/* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 55 stringop transformation on __builtin_bzero" "profile" } } */
-/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Single value 32 stringop transformation on __builtin_bzero" 0 "profile" } } */
-
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 8 stringop transformation on __builtin_memcpy" "profile" } } */
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 55 stringop transformation on __builtin_memcpy" "profile" } } */
 /* { dg-final-use-not-autofdo { scan-ipa-dump-times "Single value 32 stringop transformation on __builtin_memcpy" 0 "profile" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
new file mode 100644
index 0000000..61b601a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
@@ -0,0 +1,28 @@ 
+/* PR tree-optimization/80934 - bzero should be assumed not to escape
+   pointer argument
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-alias" } */
+
+void foobar (void);
+
+void f (void);
+
+void g (void)
+{
+  char d[32];
+  __builtin_memset (d, 0, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+void h (void)
+{
+  char d[32];
+  __builtin_bzero (d, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+/* { dg-final { scan-tree-dump-not "memset|foobar|bzero" "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
new file mode 100644
index 0000000..ece8cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
@@ -0,0 +1,31 @@ 
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-dse1" } */
+
+void sink (void*);
+
+void test_bcopy (const void *s)
+{
+  char d[33];
+
+  /* Bcopy is transformed into memcpy and those calls are expanded
+     inline in EVRP, before DSE runs, so this test doesn't actually
+     verify that DSE does its job.  */
+  __builtin_bcopy (s, d, sizeof d);
+  __builtin_bcopy (s, d, sizeof d);
+
+  sink (d);
+}
+
+void test_bzero (void)
+{
+  char d[33];
+
+  __builtin_bzero (d, sizeof d);
+  __builtin_bzero (d, sizeof d);
+
+  sink (d);
+}
+
+/* { dg-final { scan-tree-dump-times "builtin_memset" 1 "dse1" } } */
+/* { dg-final { scan-tree-dump-not "builtin_(bcopy|bzero|memcpy)" "dse1" } } */