diff mbox

Fix i386 memcpy/memset expansion (PR target/59229)

Message ID 20131126202052.GM892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 26, 2013, 8:20 p.m. UTC
Hi!

As the testcase in the patch shows, if exact memcpy or memset count
is unknown, but max_size is smaller than epilogue_size_needed,
ix86_expand_set_or_movmem can ICE.

The following patch fixes that, bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

Though, the resulting code doesn't look very good, as everything is expanded
as just epilogue of the copying/memset, I think the probabilities on the
branches expect that all bits of the remaining size are 0 after the main
loop (which isn't done in this case).

2013-11-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/59229
	* config/i386/i386.c (device_alg): Fix up formatting.
	(ix86_expand_set_or_movmem): Handle max_size < epilogue_size_needed
	similarly to count && count < epilogue_size_needed.  Fix up
	comment typo.
	* builtins.c (determine_block_size): Fix comment typo.

	* gcc.c-torture/execute/pr59229.c: New test.


	Jakub

Comments

Jeff Law Nov. 26, 2013, 9:19 p.m. UTC | #1
On 11/26/13 13:20, Jakub Jelinek wrote:
> Hi!
>
> As the testcase in the patch shows, if exact memcpy or memset count
> is unknown, but max_size is smaller than epilogue_size_needed,
> ix86_expand_set_or_movmem can ICE.
>
> The following patch fixes that, bootstrapped/regtested on x86_64-linux
> and i686-linux, ok for trunk?
>
> Though, the resulting code doesn't look very good, as everything is expanded
> as just epilogue of the copying/memset, I think the probabilities on the
> branches expect that all bits of the remaining size are 0 after the main
> loop (which isn't done in this case).
>
> 2013-11-26  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR target/59229
> 	* config/i386/i386.c (device_alg): Fix up formatting.
> 	(ix86_expand_set_or_movmem): Handle max_size < epilogue_size_needed
> 	similarly to count && count < epilogue_size_needed.  Fix up
> 	comment typo.
> 	* builtins.c (determine_block_size): Fix comment typo.
>
> 	* gcc.c-torture/execute/pr59229.c: New test.
OK.
Jeff
Jan Hubicka Nov. 26, 2013, 10:19 p.m. UTC | #2
> On 11/26/13 13:20, Jakub Jelinek wrote:
> >Hi!
> >
> >As the testcase in the patch shows, if exact memcpy or memset count
> >is unknown, but max_size is smaller than epilogue_size_needed,
> >ix86_expand_set_or_movmem can ICE.
> >
> >The following patch fixes that, bootstrapped/regtested on x86_64-linux
> >and i686-linux, ok for trunk?
> >
> >Though, the resulting code doesn't look very good, as everything is expanded
> >as just epilogue of the copying/memset, I think the probabilities on the
> >branches expect that all bits of the remaining size are 0 after the main
> >loop (which isn't done in this case).
> >
> >2013-11-26  Jakub Jelinek  <jakub@redhat.com>
> >
> >	PR target/59229
> >	* config/i386/i386.c (device_alg): Fix up formatting.
> >	(ix86_expand_set_or_movmem): Handle max_size < epilogue_size_needed
> >	similarly to count && count < epilogue_size_needed.  Fix up
> >	comment typo.
> >	* builtins.c (determine_block_size): Fix comment typo.
> >
> >	* gcc.c-torture/execute/pr59229.c: New test.
> OK.

Thanks,
I was looking into this testcase, too.
It seems to me that perhaps we can get a bit better by doing the misaligned move
prologue in these cases up to 128bytes when it starts to get too large.
The code looks better than one porduced by the standard epilogue code and it
works well on core.  I wonder how much older chips care about the misalignment,
but it is what move_by_pieces would do for constant size anyway, right?

Honza
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2013-11-25 18:30:18.000000000 +0100
+++ gcc/config/i386/i386.c	2013-11-26 11:27:38.116198901 +0100
@@ -23453,7 +23453,8 @@  decide_alg (HOST_WIDE_INT count, HOST_WI
   /* If expected size is not known but max size is small enough
      so inline version is a win, set expected size into
      the range.  */
-  if (max > 1 && (unsigned HOST_WIDE_INT)max >= max_size && expected_size == -1)
+  if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+      && expected_size == -1)
     expected_size = min_size / 2 + max_size / 2;
 
   /* If user specified the algorithm, honnor it if possible.  */
@@ -23752,7 +23753,7 @@  ix86_expand_set_or_movmem (rtx dst, rtx
   bool noalign;
   enum machine_mode move_mode = VOIDmode;
   int unroll_factor = 1;
-  /* TODO: Once vlaue ranges are available, fill in proper data.  */
+  /* TODO: Once value ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
   unsigned HOST_WIDE_INT max_size = -1;
   unsigned HOST_WIDE_INT probable_max_size = -1;
@@ -23967,21 +23968,19 @@  ix86_expand_set_or_movmem (rtx dst, rtx
 	 loop variant.  */
       if (issetmem && epilogue_size_needed > 2 && !promoted_val)
 	force_loopy_epilogue = true;
-      if (count)
+      if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+	  || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
 	{
-	  if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
-	    {
-	      /* If main algorithm works on QImode, no epilogue is needed.
-		 For small sizes just don't align anything.  */
-	      if (size_needed == 1)
-		desired_align = align;
-	      else
-		goto epilogue;
-	    }
+	  /* If main algorithm works on QImode, no epilogue is needed.
+	     For small sizes just don't align anything.  */
+	  if (size_needed == 1)
+	    desired_align = align;
+	  else
+	    goto epilogue;
 	}
-      else if (min_size < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+      else if (!count
+	       && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
 	{
-	  gcc_assert (max_size >= (unsigned HOST_WIDE_INT)epilogue_size_needed);
 	  label = gen_label_rtx ();
 	  emit_cmp_and_jump_insns (count_exp,
 				   GEN_INT (epilogue_size_needed),
--- gcc/builtins.c.jj	2013-11-22 21:03:07.000000000 +0100
+++ gcc/builtins.c	2013-11-26 11:15:11.992044093 +0100
@@ -3146,7 +3146,7 @@  determine_block_size (tree len, rtx len_
 	}
       else if (range_type == VR_ANTI_RANGE)
 	{
-	  /* Anti range 0...N lets us to determine minmal size to N+1.  */
+	  /* Anti range 0...N lets us to determine minimal size to N+1.  */
 	  if (min.is_zero ())
 	    {
 	      if ((max + double_int_one).fits_uhwi ())
@@ -3156,7 +3156,7 @@  determine_block_size (tree len, rtx len_
 
 	     int n;
 	     if (n < 100)
-	       memcpy (a,b, n)
+	       memcpy (a, b, n)
 
 	     Produce anti range allowing negative values of N.  We still
 	     can use the information and make a guess that N is not negative.
--- gcc/testsuite/gcc.c-torture/execute/pr59229.c.jj	2013-11-26 11:32:07.590806813 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr59229.c	2013-11-26 11:31:56.000000000 +0100
@@ -0,0 +1,29 @@ 
+int i;
+
+__attribute__((noinline, noclone)) void
+bar (char *p)
+{
+  if (i < 1 || i > 6)
+    __builtin_abort ();
+  if (__builtin_memcmp (p, "abcdefg", i + 1) != 0)
+    __builtin_abort ();
+  __builtin_memset (p, ' ', 7);
+}
+
+__attribute__((noinline, noclone)) void
+foo (char *p, unsigned long l)
+{
+  if (l < 1 || l > 6)
+    return;
+  char buf[7];
+  __builtin_memcpy (buf, p, l + 1);
+  bar (buf);
+}
+
+int
+main ()
+{
+  for (i = 0; i < 16; i++)
+    foo ("abcdefghijklmnop", i);
+  return 0;
+}