PR tree-optimization/90626 - fold strcmp(a, b) == 0 to zero when one string length is exact and the other is unequal
gcc/ChangeLog:
PR tree-optimization/90626
* tree-ssa-strlen.c (strxcmp_unequal): New function.
(handle_builtin_string_cmp): Call it.
gcc/testsuite/ChangeLog:
PR tree-optimization/90626
* gcc.dg/strlenopt-65.c: New test.
* gcc.dg/strlenopt-66.c: New test.
* gcc.dg/strlenopt.h (strcmp, strncmp): Declare.
new file mode 100644
@@ -0,0 +1,162 @@
+/* PRE tree-optimization/90626 - fold strcmp(a, b) == 0 to zero when
+ one string length is exact and the other is unequal
+ { dg-do compile }
+ { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void* memcpy (void *, const void *, size_t);
+extern int strcmp (const char *, const char *);
+extern int strncmp (const char *, const char *, size_t);
+
+#define CAT(x, y) x ## y
+#define CONCAT(x, y) CAT (x, y)
+#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do { \
+ extern void FAILNAME (name) (void); \
+ FAILNAME (name)(); \
+ } while (0)
+
+/* Macro to emit a call to function named
+ call_in_true_branch_not_eliminated_on_line_NNN()
+ for each call that's expected to be eliminated. The dg-final
+ scan-tree-dump-time directive at the bottom of the test verifies
+ that no such call appears in output. */
+#define ELIM_IF_TRUE(expr) \
+ if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+/* Macro to emit a call to a function named
+ call_made_in_{true,false}_branch_on_line_NNN()
+ for each call that's expected to be retained. The dg-final
+ scan-tree-dump-time directive at the bottom of the test verifies
+ that the expected number of both kinds of calls appears in output
+ (a pair for each line with the invocation of the KEEP() macro. */
+#define TEST_KEEP(expr) \
+ if (expr) \
+ FAIL (made_in_true_branch); \
+ else \
+ FAIL (made_in_false_branch)
+
+#define FOLD(init1, init2, arg1, arg2, bnd) \
+ do { \
+ char a[8], b[8]; \
+ sink (a, b); \
+ memcpy (a, init1, sizeof init1 - 1); \
+ memcpy (b, init2, sizeof init2 - 1); \
+ ELIM_IF_TRUE (0 != CMPFUNC (arg1, arg2, bnd)); \
+ } while (0)
+
+#define KEEP(init1, init2, arg1, arg2, bnd) \
+ do { \
+ char a[8], b[8]; \
+ sink (a, b); \
+ memcpy (a, init1, sizeof init1 - 1); \
+ memcpy (b, init2, sizeof init2 - 1); \
+ TEST_KEEP (0 == CMPFUNC (arg1, arg2, bnd)); \
+ } while (0)
+
+const char s0[1] = "";
+const char s00[2] = "\0";
+const char s10[2] = "1";
+const char s20[2] = "2";
+
+void sink (void*, ...);
+
+void test_strcmp_elim (void)
+{
+#undef CMPFUNC
+#define CMPFUNC(a, b, dummy) strcmp (a, b)
+
+ FOLD (s00, s10, "\0", "1", -1);
+ FOLD (s00, s10, "\0", b, -1);
+ FOLD (s00, s10, "\0", s10, -1);
+
+ FOLD (s00, s10, s0, "1", -1);
+ FOLD (s00, s10, s0, b, -1);
+ FOLD (s00, s10, s0, s10, -1);
+
+ FOLD ("\0", "1", s0, "1", -1);
+ FOLD ("\0", "1", s0, b, -1);
+ FOLD ("\0", "1", s0, s10, -1);
+
+ FOLD ("2", "\0", "2", "\0", -1);
+ FOLD ("2", "\0", s20, s0, -1);
+
+ FOLD ("\0", "1", a, b, -1);
+ FOLD ("2", "\0", a, b, -1);
+
+ FOLD ("4\0", "44", a, b, -1);
+ FOLD ("55", "5\0", a, b, -1);
+
+ FOLD ("666\0", "6666", a, "6666", -1);
+ FOLD ("666\0", "6666", a, b, -1);
+ FOLD ("7777", "777\0", a, b, -1);
+
+ /* Avoid testing substrings of equal length with different characters.
+ The optimization doesn't have access to the contents of the strings
+ so it can't determine whether they are equal.
+
+ FOLD ("111\0", "112", a, b, -1);
+ FOLD ("112", "111\0", a, b, -1); */
+}
+
+const char s123[] = "123";
+const char s1230[] = "123\0";
+
+const char s1234[] = "1234";
+const char s12340[] = "1234\0";
+
+void test_strncmp_elim (void)
+{
+#undef CMPFUNC
+#define CMPFUNC(a, b, n) strncmp (a, b, n)
+
+ FOLD (s1230, s1234, "123", "1234", 4);
+ FOLD (s1234, s1230, "1234", "123", 4);
+
+ FOLD (s1230, s1234, "123", s1234, 4);
+ FOLD (s1234, s1230, "1234", s123, 4);
+
+ FOLD (s1230, s1234, s123, "1234", 4);
+ FOLD (s1234, s1230, s1234, "123", 4);
+
+ FOLD (s1230, s1234, s123, b, 4);
+ FOLD (s1234, s1230, s1234, b, 4);
+
+ FOLD (s1230, s1234, a, b, 4);
+ FOLD (s1234, s1230, a, b, 4);
+
+ FOLD ("123\0", "1234", a, b, 5);
+ FOLD ("1234", "123\0", a, b, 5);
+}
+
+
+#line 1000
+
+void test_strcmp_keep (const char *s, const char *t)
+{
+#undef CMPFUNC
+#define CMPFUNC(a, b, dummy) strcmp (a, b)
+
+ KEEP ("1", "1", a, b, -1);
+
+ KEEP ("1\0", "1", a, b, -1);
+ KEEP ("1", "1\0", a, b, -1);
+
+ KEEP ("12\0", "12", a, b, -1);
+ KEEP ("12", "12\0", a, b, -1);
+
+ KEEP ("111\0", "111", a, b, -1);
+ KEEP ("112", "112\0", a, b, -1);
+
+ KEEP (s, t, a, b, -1);
+}
+
+/* { dg-final { scan-tree-dump-times "call_in_true_branch_not_eliminated_" 0 "optimized" } }
+
+ { dg-final { scan-tree-dump-times "call_made_in_true_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } }
+ { dg-final { scan-tree-dump-times "call_made_in_false_branch_on_line_1\[0-9\]\[0-9\]\[0-9\]" 8 "optimized" } } */
new file mode 100644
@@ -0,0 +1,72 @@
+/* PRE tree-optimization/90626 - fold strcmp(a, b) == 0 to zero when
+ one string length is exact and the other is unequal
+ { dg-do compile }
+ { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+#define A(expr) \
+ ((expr) \
+ ? (void)0 \
+ : (__builtin_printf ("assertion failed on line %i: %s\n", \
+ __LINE__, #expr), \
+ __builtin_abort ()))
+
+
+__attribute__ ((noclone, noinline, noipa)) void
+clobber (void *p, int x, size_t n)
+{
+ for (volatile unsigned char *q = p; n--; )
+ *q = x;
+}
+
+__attribute__ ((noclone, noinline, noipa)) void
+test_strcmp (void)
+{
+ char a[8], b[8];
+ strcpy (a, "1235");
+ strcpy (b, "1234");
+
+ A (strcmp (a, b));
+
+ clobber (a, 0, sizeof a);
+ clobber (b, 0, sizeof b);
+ clobber (b + 4, '5', 1);
+
+ memcpy (a, "1234", 4);
+ memcpy (b, "1234", 4);
+
+ A (0 > strcmp (a, b));
+ A (0 < strcmp (b, a));
+}
+
+__attribute__ ((noclone, noinline, noipa)) void
+test_strncmp (void)
+{
+ char a[8], b[8];
+ strcpy (a, "1235");
+ strcpy (b, "1234");
+
+ A (0 == strncmp (a, b, 1));
+ A (0 == strncmp (a, b, 2));
+ A (0 == strncmp (a, b, 3));
+ A (0 < strncmp (a, b, 4));
+ A (0 > strncmp (b, a, 4));
+
+ clobber (a, 0, sizeof a);
+ clobber (b, 0, sizeof b);
+ clobber (b + 4, '5', 1);
+
+ memcpy (a, "1234", 4);
+ memcpy (b, "1234", 4);
+
+ A (0 == strncmp (a, b, 4));
+ A (0 > strncmp (a, b, 5));
+ A (0 < strncmp (b, a, 5));
+}
+
+int main (void)
+{
+ test_strcmp ();
+ test_strncmp ();
+}
@@ -15,6 +15,8 @@ void *memmove (void *, const void *, size_t);
char *strcpy (char *__restrict, const char *__restrict);
char *strcat (char *__restrict, const char *__restrict);
char *strchr (const char *, int);
+int strcmp (const char *, const char *);
+int strncmp (const char *, const char *, size_t);
void *memset (void *, int, size_t);
int memcmp (const void *, const void *, size_t);
int strcmp (const char *, const char *);
@@ -2941,6 +2941,73 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi)
return true;
}
+/* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
+ the result of 0 == strncmp (A, B, N) (which is the same as strcmp for
+ sufficiently large N). Otherwise return false. */
+
+static bool
+strxcmp_unequal (int idx1, int idx2, unsigned HOST_WIDE_INT n)
+{
+ unsigned HOST_WIDE_INT len1;
+ unsigned HOST_WIDE_INT len2;
+
+ bool nulterm1;
+ bool nulterm2;
+
+ if (idx1 < 0)
+ {
+ len1 = ~idx1;
+ nulterm1 = true;
+ }
+ else if (strinfo *si = get_strinfo (idx1))
+ {
+ if (tree_fits_uhwi_p (si->nonzero_chars))
+ {
+ len1 = tree_to_uhwi (si->nonzero_chars);
+ nulterm1 = si->full_string_p;
+ }
+ else
+ return false;
+ }
+ else
+ return false;
+
+ if (idx2 < 0)
+ {
+ len2 = ~idx2;
+ nulterm1 = true;
+ }
+ else if (strinfo *si = get_strinfo (idx2))
+ {
+ if (tree_fits_uhwi_p (si->nonzero_chars))
+ {
+ len2 = tree_to_uhwi (si->nonzero_chars);
+ nulterm2 = si->full_string_p;
+ }
+ else
+ return false;
+ }
+ else
+ return false;
+
+ /* N is set to UHWI_MAX for strcmp and less to strncmp. Adjust
+ the length of each string to consider to be no more than N. */
+ if (len1 > n)
+ len1 = n;
+ if (len2 > n)
+ len2 = n;
+
+ if (len1 != len2 && (nulterm1 || nulterm2))
+ /* The string lengths are definitely unequal and the result can
+ be folded to one (since it's used for comparison with zero). */
+ return true;
+
+ /* The string lengths may be equal or unequal. Even when equal and
+ both strings nul-terminated, without the string contents there's
+ no way to determine whether they are equal. */
+ return false;
+}
+
/* Given an index to the strinfo vector, compute the string length
for the corresponding string. Return -1 when unknown. */
@@ -3126,18 +3193,12 @@ handle_builtin_string_cmp (gimple_stmt_iterator *gsi)
return false;
}
- /* When the lengths of both arguments are known, and they are unequal,
+ /* When the lengths of the arguments are known to be unequal
we can safely fold the call to a non-zero value for strcmp;
othewise, do nothing now. */
if (idx1 != 0 && idx2 != 0)
{
- HOST_WIDE_INT const_string_leni1 = compute_string_length (idx1);
- HOST_WIDE_INT const_string_leni2 = compute_string_length (idx2);
-
- if (!is_ncmp
- && const_string_leni1 != -1
- && const_string_leni2 != -1
- && const_string_leni1 != const_string_leni2)
+ if (strxcmp_unequal (idx1, idx2, length))
{
replace_call_with_value (gsi, integer_one_node);
return true;