[v3,12/18] string: Improve generic strcpy

Message ID 1515588482-15744-13-git-send-email-adhemerval.zanella@linaro.org
State New
Headers show
Series
  • Improve generic string routines
Related show

Commit Message

Adhemerval Zanella Jan. 10, 2018, 12:47 p.m.
From: Adhemerval Zanella <adhemerval.zanella@linaro.com>

New generic implementation tries to use word operations along with
the new string-fz{b,i} functions even for inputs with different
alignments (with still uses aligned access plus merge operation
to get a correct word by word comparison).

Checked on x86_64-linux-gnu, i686-linux-gnu, sparc64-linux-gnu,
and sparcv9-linux-gnu by removing the arch-specific assembly
implementation and disabling multi-arch (it covers both LE and BE
for 64 and 32 bits).

	Richard Henderson  <rth@twiddle.net>
	Adhemerval Zanella  <adhemerval.zanella@linaro.org>

	* string/strcpy.c: Rewrite using memcopy.h, string-fzb.h,
        string-fzi.h.
	* string/test-strcpy.c (test_main): Add move coverage.
---
 string/strcpy.c      | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 string/test-strcpy.c |  24 +++++++++++-
 2 files changed, 130 insertions(+), 3 deletions(-)

Patch

diff --git a/string/strcpy.c b/string/strcpy.c
index a4cce89..358b1b1 100644
--- a/string/strcpy.c
+++ b/string/strcpy.c
@@ -15,8 +15,13 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stddef.h>
 #include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <memcopy.h>
 
 #undef strcpy
 
@@ -28,6 +33,106 @@ 
 char *
 STRCPY (char *dest, const char *src)
 {
-  return memcpy (dest, src, strlen (src) + 1);
+  char *dst = dest;
+  const op_t *xs;
+  op_t *xd;
+  op_t ws;
+
+#if _STRING_ARCH_unaligned
+  /* For architectures which supports unaligned memory operations, it first
+     aligns the source pointer, reads op_t bytes at time until a zero is
+     found, and writes unaligned to destination.  */
+  uintptr_t n = -(uintptr_t) src % sizeof (op_t);
+  for (uintptr_t i = 0; i < n; ++i)
+    {
+      unsigned c = *src++;
+      *dst++ = c;
+      if (c == '\0')
+	return dest;
+    }
+  xs = (const op_t *) src;
+  ws = *xs++;
+  xd = (op_t *) dst;
+  while (!has_zero (ws))
+    {
+      *xd++ = ws;
+      ws = *xs++;
+    }
+#else
+  /* For architectures which only supports aligned accesses, it first align
+     the destination pointer.  */
+  uintptr_t n = -(uintptr_t) dst % sizeof (op_t);
+  for (uintptr_t i = 0; i < n; ++i)
+    {
+      unsigned c = *src++;
+      *dst++ = c;
+      if (c == '\0')
+	return dest;
+    }
+  xd = (op_t *) dst;
+
+  /* Destination is aligned to op_t while source might be not.  */
+  uintptr_t ofs = (uintptr_t) src % sizeof (op_t);
+  if (ofs == 0)
+    {
+      /* Aligned loop.  If a zero is found, exit to copy the remaining
+	 bytes.  */
+      xs = (const op_t *) src;
+
+      ws = *xs++;
+      while (!has_zero (ws))
+	{
+	  *xd++ = ws;
+	  ws = *xs++;
+	}
+    }
+  else
+    {
+      /* Unaligned loop: align the source pointer and mask off the
+	 undesirable bytes which is not part of the string.  */
+      op_t wsa, wsb;
+      uintptr_t sh_1, sh_2;
+
+      xs = (const op_t *)(src - ofs);
+      wsa = *xs++;
+      sh_1 = ofs * CHAR_BIT;
+      sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
+
+      /* Align the first partial op_t from source, with 0xff for the rest
+	 of the bytes so that we can also apply the has_zero test to see if we
+         have already reached EOS.  If we have, then we can simply fall
+         through to the final byte copies.  */
+      ws = MERGE (wsa, sh_1, (op_t)-1, sh_2);
+      if (!has_zero (ws))
+	{
+	  while (1)
+	    {
+	      wsb = *xs++;
+	      ws = MERGE (wsa, sh_1, wsb, sh_2);
+	      if (has_zero (wsb))
+		break;
+	      *xd++ = ws;
+	      wsa = wsb;
+	    }
+
+	  /* WS may contain bytes that we not written yet in destination.
+	     Write them down and merge with the op_t containing the EOS
+	     byte. */
+	  if (!has_zero (ws))
+	    {
+	      *xd++ = ws;
+	      ws = MERGE (wsb, sh_1, ws, sh_2);
+	    }
+	}
+    }
+#endif
+
+  /* Just copy the final bytes from op_t.  */
+  dst = (char *) xd;
+  uintptr_t fz = index_first_zero (ws);
+  for (uintptr_t i = 0; i < fz + 1; i++)
+    *dst++ = extractbyte (ws, i);
+
+  return dest;
 }
 libc_hidden_builtin_def (strcpy)
diff --git a/string/test-strcpy.c b/string/test-strcpy.c
index 2a1bf93..fa03c73 100644
--- a/string/test-strcpy.c
+++ b/string/test-strcpy.c
@@ -207,7 +207,7 @@  do_random_tests (void)
 int
 test_main (void)
 {
-  size_t i;
+  size_t i, j;
 
   test_init ();
 
@@ -222,12 +222,26 @@  test_main (void)
       do_test (0, 0, i, BIG_CHAR);
       do_test (0, i, i, SMALL_CHAR);
       do_test (i, 0, i, BIG_CHAR);
+
+      for (j = 1; j < 16; ++j)
+	{
+	  do_test (0, 0, i + j, SMALL_CHAR);
+	  do_test (0, 0, i + j, BIG_CHAR);
+	  do_test (0, i, i + j, SMALL_CHAR);
+	  do_test (i, 0, i + j, BIG_CHAR);
+	}
     }
 
   for (i = 1; i < 8; ++i)
     {
       do_test (0, 0, 8 << i, SMALL_CHAR);
       do_test (8 - i, 2 * i, 8 << i, SMALL_CHAR);
+
+      for (j = 1; j < 8; ++j)
+	{
+	  do_test (0, 0, (8 << i) + j, SMALL_CHAR);
+	  do_test (8 - i, 2 * i, (8 << i) + j, SMALL_CHAR);
+	}
     }
 
   for (i = 1; i < 8; ++i)
@@ -236,6 +250,14 @@  test_main (void)
       do_test (2 * i, i, 8 << i, BIG_CHAR);
       do_test (i, i, 8 << i, SMALL_CHAR);
       do_test (i, i, 8 << i, BIG_CHAR);
+
+      for (j = 1; j < 8; ++j)
+	{
+	  do_test (i, 2 * i, (8 << i) + j, SMALL_CHAR);
+	  do_test (2 * i, i, (8 << i) + j, BIG_CHAR);
+	  do_test (i, i, (8 << i) + j, SMALL_CHAR);
+	  do_test (i, i, (8 << i) + j, BIG_CHAR);
+	}
     }
 
   do_random_tests ();