diff mbox

Limit SH strncmp inline expansion (PR target/78460)

Message ID alpine.DEB.2.20.1708152114250.19980@digraph.polyomino.org.uk
State New
Headers show

Commit Message

Joseph Myers Aug. 15, 2017, 9:15 p.m. UTC
GCC mainline built for sh4-linux-gnu runs out of memory building a
glibc test, which calls strncmp with very large constant size
argument, resulting in the SH inline strncmp expansion trying to
inline a fully unrolled expansion of strncmp for that size.

This patch limits that fully unrolled expansion to the case of less
than 32 bytes.  This is explicitly *not* trying to be optimal in any
way (very likely a lower threshold makes sense), just to limit enough
to avoid the out-of-memory issue in the glibc testsuite.

I have *not* run the GCC testsuite for SH.  I have verified that this
allows the glibc testsuite to build OK, with both GCC mainline and GCC
7 branch (and that the included test builds quickly with patched GCC,
runs out of memory with unpatched GCC).

OK to commit (mainline and GCC 7 branch)?

gcc:
2017-08-15  Joseph Myers  <joseph@codesourcery.com>

	PR target/78460
	* config/sh/sh-mem.cc (sh_expand_cmpnstr): Only unroll for
	constant count if that count is less than 32.

gcc/testsuite:
2017-08-15  Joseph Myers  <joseph@codesourcery.com>

	PR target/78460
	* gcc.c-torture/compile/string-large-1.c: New test.

Comments

Oleg Endo Aug. 15, 2017, 11:27 p.m. UTC | #1
On Tue, 2017-08-15 at 21:15 +0000, Joseph Myers wrote:
> GCC mainline built for sh4-linux-gnu runs out of memory building a
> glibc test, which calls strncmp with very large constant size
> argument, resulting in the SH inline strncmp expansion trying to
> inline a fully unrolled expansion of strncmp for that size.
> 
> This patch limits that fully unrolled expansion to the case of less
> than 32 bytes.  This is explicitly *not* trying to be optimal in any
> way (very likely a lower threshold makes sense), just to limit enough
> to avoid the out-of-memory issue in the glibc testsuite.
> 
> I have *not* run the GCC testsuite for SH.  I have verified that this
> allows the glibc testsuite to build OK, with both GCC mainline and
> GCC 7 branch (and that the included test builds quickly with patched
> GCC, runs out of memory with unpatched GCC).
> 
> OK to commit (mainline and GCC 7 branch)?

Yes, that's OK.
This is an older issue.  Please also add a reference to PR 67712 in
your commit.  Can you also apply it to GCC 6 branch please?

Thanks!

Cheers,
Oleg
Joseph Myers Aug. 15, 2017, 11:44 p.m. UTC | #2
On Wed, 16 Aug 2017, Oleg Endo wrote:

> This is an older issue.  Please also add a reference to PR 67712 in
> your commit.  Can you also apply it to GCC 6 branch please?

I can't reproduce the problem with GCC 6 branch; the glibc testsuite 
builds fine without out-of-memory issues, as does the test included in the 
patch, despite the GCC code in question being essentially unchanged.  
That's why the bug appears to me as a regression in GCC 7.
Oleg Endo Aug. 16, 2017, 3:03 p.m. UTC | #3
On Tue, 2017-08-15 at 23:44 +0000, Joseph Myers wrote:

> > This is an older issue.  Please also add a reference to PR 67712 in
> > your commit.  Can you also apply it to GCC 6 branch please?

> I can't reproduce the problem with GCC 6 branch; the glibc testsuite 
> builds fine without out-of-memory issues, as does the test included
> in the  patch, despite the GCC code in question being essentially
> unchanged.  That's why the bug appears to me as a regression in GCC
> 7.

OK, thanks for the investigation!

Cheers,
Oleg
diff mbox

Patch

Index: gcc/config/sh/sh-mem.cc
===================================================================
--- gcc/config/sh/sh-mem.cc	(revision 251105)
+++ gcc/config/sh/sh-mem.cc	(working copy)
@@ -349,12 +349,13 @@  sh_expand_cmpnstr (rtx *operands)
 
   rtx len = copy_to_mode_reg (SImode, operands[3]);
   int constp = CONST_INT_P (operands[3]);
+  HOST_WIDE_INT bytes = constp ? INTVAL (operands[3]) : 0;
 
   const unsigned int addr1_alignment = MEM_ALIGN (operands[1]) / BITS_PER_UNIT;
   const unsigned int addr2_alignment = MEM_ALIGN (operands[2]) / BITS_PER_UNIT;
 
   /* Loop on a register count.  */
-  if (constp)
+  if (constp && bytes >= 0 && bytes < 32)
     {
       rtx tmp0 = gen_reg_rtx (SImode);
       rtx tmp3 = gen_reg_rtx (SImode);
@@ -363,7 +364,6 @@  sh_expand_cmpnstr (rtx *operands)
       rtx_code_label *L_loop_long = gen_label_rtx ();
       rtx_code_label *L_end_loop_long = gen_label_rtx ();
 
-      int bytes = INTVAL (operands[3]);
       int witers = bytes / 4;
 
       if (witers > 1)
Index: gcc/testsuite/gcc.c-torture/compile/string-large-1.c
===================================================================
--- gcc/testsuite/gcc.c-torture/compile/string-large-1.c	(nonexistent)
+++ gcc/testsuite/gcc.c-torture/compile/string-large-1.c	(working copy)
@@ -0,0 +1,119 @@ 
+/* Test built-in string functions with large sizes.  PR 78460.  */
+
+typedef __SIZE_TYPE__ size_t;
+
+#define SIZE1 ((size_t) -1)
+#define SIZE2 (SIZE1 >> 1)
+#define SIZE3 ((unsigned int) -1)
+#define SIZE4 (SIZE3 >> 1)
+
+volatile int v1, v2, v3, v4;
+void *volatile vp1, *volatile vp2, *volatile vp3, *volatile vp4;
+
+void
+test_memchr (const void *a, int b)
+{
+  vp1 = __builtin_memchr (a, b, SIZE1);
+  vp2 = __builtin_memchr (a, b, SIZE2);
+  vp3 = __builtin_memchr (a, b, SIZE3);
+  vp4 = __builtin_memchr (a, b, SIZE4);
+}
+
+void
+test_memcmp (const void *a, const void *b)
+{
+  v1 = __builtin_memcmp (a, b, SIZE1);
+  v2 = __builtin_memcmp (a, b, SIZE2);
+  v3 = __builtin_memcmp (a, b, SIZE3);
+  v4 = __builtin_memcmp (a, b, SIZE4);
+}
+
+void
+test_memcpy (void *a, const void *b)
+{
+  vp1 = __builtin_memcpy (a, b, SIZE1);
+  vp2 = __builtin_memcpy (a, b, SIZE2);
+  vp3 = __builtin_memcpy (a, b, SIZE3);
+  vp4 = __builtin_memcpy (a, b, SIZE4);
+}
+
+void
+test_memmove (void *a, const void *b)
+{
+  vp1 = __builtin_memmove (a, b, SIZE1);
+  vp2 = __builtin_memmove (a, b, SIZE2);
+  vp3 = __builtin_memmove (a, b, SIZE3);
+  vp4 = __builtin_memmove (a, b, SIZE4);
+}
+
+void
+test_mempcpy (void *a, const void *b)
+{
+  vp1 = __builtin_mempcpy (a, b, SIZE1);
+  vp2 = __builtin_mempcpy (a, b, SIZE2);
+  vp3 = __builtin_mempcpy (a, b, SIZE3);
+  vp4 = __builtin_mempcpy (a, b, SIZE4);
+}
+
+void
+test_memset (void *a, int b)
+{
+  vp1 = __builtin_memset (a, b, SIZE1);
+  vp2 = __builtin_memset (a, b, SIZE2);
+  vp3 = __builtin_memset (a, b, SIZE3);
+  vp4 = __builtin_memset (a, b, SIZE4);
+}
+
+void
+test_stpncpy (char *a, const char *b)
+{
+  vp1 = __builtin_stpncpy (a, b, SIZE1);
+  vp2 = __builtin_stpncpy (a, b, SIZE2);
+  vp3 = __builtin_stpncpy (a, b, SIZE3);
+  vp4 = __builtin_stpncpy (a, b, SIZE4);
+}
+
+void
+test_strndup (const char *a)
+{
+  vp1 = __builtin_strndup (a, SIZE1);
+  vp2 = __builtin_strndup (a, SIZE2);
+  vp3 = __builtin_strndup (a, SIZE3);
+  vp4 = __builtin_strndup (a, SIZE4);
+}
+
+void
+test_strncasecmp (const char *a, const char *b)
+{
+  v1 = __builtin_strncasecmp (a, b, SIZE1);
+  v2 = __builtin_strncasecmp (a, b, SIZE2);
+  v3 = __builtin_strncasecmp (a, b, SIZE3);
+  v4 = __builtin_strncasecmp (a, b, SIZE4);
+}
+
+void
+test_strncat (char *a, const char *b)
+{
+  vp1 = __builtin_strncat (a, b, SIZE1);
+  vp2 = __builtin_strncat (a, b, SIZE2);
+  vp3 = __builtin_strncat (a, b, SIZE3);
+  vp4 = __builtin_strncat (a, b, SIZE4);
+}
+
+void
+test_strncmp (const char *a, const char *b)
+{
+  v1 = __builtin_strncmp (a, b, SIZE1);
+  v2 = __builtin_strncmp (a, b, SIZE2);
+  v3 = __builtin_strncmp (a, b, SIZE3);
+  v4 = __builtin_strncmp (a, b, SIZE4);
+}
+
+void
+test_strncpy (char *a, const char *b)
+{
+  vp1 = __builtin_strncpy (a, b, SIZE1);
+  vp2 = __builtin_strncpy (a, b, SIZE2);
+  vp3 = __builtin_strncpy (a, b, SIZE3);
+  vp4 = __builtin_strncpy (a, b, SIZE4);
+}