Replace strstr() implementations with faster two-way version

Submitted by Jody Bruchon on Jan. 4, 2015, 7:05 p.m.

Details

Message ID 54A98EE6.70402@jodybruchon.com
State New
Headers show

Commit Message

Jody Bruchon Jan. 4, 2015, 7:05 p.m.
From c42bb573e9a2deb841181082efd3313d89494cf9 Mon Sep 17 00:00:00 2001
From: Jody Bruchon <jody@c02ware.com>
Date: Sun, 4 Jan 2015 13:45:11 -0500
Subject: [PATCH] Replace strstr() implementations with faster two-way version

Two strstr() implementations exist within uClibc: a "naive" one
used when wide character support is enabled and a faster version
by Stephen R. van den Berg. I rewrote the naive implementation
from scratch to scan potential haystack matches in both directions
to reject failed matches more quickly. I benchmarked five strstr()
implementations on x86_64 using code from libc-bench, available at
http://www.etalabs.net/src/libc-bench/string.c

Typical benchmark results:

naive strstr(): 10.615s
SJVDB strstr():  8.952s
My strstr():     2.511s
glibc strstr():  0.225s
musl strstr():   0.215s

Depending on the data and the optimization flags thrown at it, my
strstr() rewrite is 4x-10x faster than the naive version and is
3x-8x faster than the SJVDB version. The .text is also a few bytes
smaller.

This needs testing on other architectures than x86_64.

Signed-off-by: Jody Bruchon <jody@c02ware.com>
---
 libc/string/generic/strstr.c | 150 ++++++++++++++-----------------------------
 libc/string/strstr.c         |  63 ++++++++++++------
 2 files changed, 90 insertions(+), 123 deletions(-)

Comments

Jody Bruchon Jan. 4, 2015, 7:07 p.m.
Whoops, it seems I typed Stephen's initials wrong in that patch text. 
s/SJVDB/SRVDB/g and apologies for that.
aldot Feb. 18, 2015, 8:14 a.m.
On January 4, 2015 8:07:45 PM GMT+01:00, Jody Bruchon <jody@jodybruchon.com> wrote:
>Whoops, it seems I typed Stephen's initials wrong in that patch text. 
>s/SJVDB/SRVDB/g and apologies for that.

I have queued to look at it in detail after the release.
Will get back to you then.
Thanks for your patience!

Patch hide | download patch | download mbox

diff --git a/libc/string/generic/strstr.c b/libc/string/generic/strstr.c
index dd10176..c58bb6f 100644
--- a/libc/string/generic/strstr.c
+++ b/libc/string/generic/strstr.c
@@ -1,112 +1,56 @@ 
-/* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
 /*
- * My personal strstr() implementation that beats most other algorithms.
- * Until someone tells me otherwise, I assume that this is the
- * fastest implementation of strstr() in C.
- * I deliberately chose not to comment it.  You should have at least
- * as much fun trying to understand it, as I had to write it :-).
+ * Copyright (C) 2002     Manuel Novoa III
+ * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+ * Copyright (C) 2015 Jody Bruchon <jody@jodybruchon.com>
  *
- * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de	*/
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
 
 #include <string.h>
 
-
-typedef unsigned chartype;
-
-char *strstr (const char *phaystack, const char *pneedle)
+/*
+ * Rewritten "two-way" strstr() by Jody Bruchon
+ * This new strstr() implementation scans for the substring in both
+ * directions instead of scanning from start to end. Benchmarks for
+ * strstr() from libc-bench (http://www.etalabs.net/libc-bench.html)
+ * indicate this implementation is ~5-10x faster than the "naive"
+ * one it replaces.
+ */
+char *strstr(const char *s1, const char *s2)
 {
-  const unsigned char *haystack, *needle;
-  chartype b;
-  const unsigned char *rneedle;
-
-  haystack = (const unsigned char *) phaystack;
-
-  if ((b = *(needle = (const unsigned char *) pneedle)))
-    {
-      chartype c;
-      haystack--;		/* possible ANSI violation */
+        register const char *haystack = s1;
+        register const char *needle = s2;
+        int pos = 0;
+        register int cnt;
+        size_t hay_len;
+        size_t n_len;
+
+        if (!*needle) return (char *) s1;
+        n_len = strlen(needle);
+        hay_len = strlen(haystack);
+
+        cnt = n_len;
+        do {
+                /* Give up if needle is longer than remaining haystack */
+                if ((pos + n_len - 1) > hay_len) return NULL;
+
+                /* Scan for match in both directions */
+                while (*needle == *haystack) {
+                        cnt--;
+                        if (!cnt) return (char *) (s1 + pos);
+                        if (*(needle + cnt) == *(haystack + cnt)) {
+                                cnt--;
+                                if (!cnt) return (char *) (s1 + pos);
+                        } else break;
+                        needle++;
+                        haystack++;
+                }
+                pos++;
+                haystack = s1 + pos;
+                needle = s2;
+                cnt = n_len;
+        } while (1);
 
-      {
-	chartype a;
-	do
-	  if (!(a = *++haystack))
-	    goto ret0;
-	while (a != b);
-      }
-
-      if (!(c = *++needle))
-	goto foundneedle;
-      ++needle;
-      goto jin;
-
-      for (;;)
-	{
-	  {
-	    chartype a;
-	    if (0)
-	    jin:{
-		if ((a = *++haystack) == c)
-		  goto crest;
-	      }
-	    else
-	      a = *++haystack;
-	    do
-	      {
-		for (; a != b; a = *++haystack)
-		  {
-		    if (!a)
-		      goto ret0;
-		    if ((a = *++haystack) == b)
-		      break;
-		    if (!a)
-		      goto ret0;
-		  }
-	      }
-	    while ((a = *++haystack) != c);
-	  }
-	crest:
-	  {
-	    chartype a;
-	    {
-	      const unsigned char *rhaystack;
-	      if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
-		do
-		  {
-		    if (!a)
-		      goto foundneedle;
-		    if (*++rhaystack != (a = *++needle))
-		      break;
-		    if (!a)
-		      goto foundneedle;
-		  }
-		while (*++rhaystack == (a = *++needle));
-	      needle = rneedle;	/* took the register-poor aproach */
-	    }
-	    if (!a)
-	      break;
-	  }
-	}
-    }
-foundneedle:
-  return (char *) haystack;
-ret0:
-  return 0;
 }
+
 libc_hidden_def(strstr)
diff --git a/libc/string/strstr.c b/libc/string/strstr.c
index 7e2a64e..129747d 100644
--- a/libc/string/strstr.c
+++ b/libc/string/strstr.c
@@ -1,6 +1,7 @@ 
 /*
  * Copyright (C) 2002     Manuel Novoa III
  * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+ * Copyright (C) 2015 Jody Bruchon <jody@jodybruchon.com>
  *
  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  */
@@ -13,29 +14,51 @@ 
 # define Wstrstr strstr
 #endif
 
-/* NOTE: This is the simple-minded O(len(s1) * len(s2)) worst-case approach. */
-
+/*
+ * Rewritten "two-way" strstr() by Jody Bruchon
+ * This new strstr() implementation scans for the substring in both
+ * directions instead of scanning from start to end. Benchmarks for
+ * strstr() from libc-bench (http://www.etalabs.net/libc-bench.html)
+ * indicate this implementation is ~5-10x faster than the "naive"
+ * one it replaces.
+ */
 Wchar *Wstrstr(const Wchar *s1, const Wchar *s2)
 {
-	register const Wchar *s = s1;
-	register const Wchar *p = s2;
-
-	do {
-		if (!*p) {
-			return (Wchar *) s1;;
-		}
-		if (*p == *s) {
-			++p;
-			++s;
-		} else {
-			p = s2;
-			if (!*s) {
-				return NULL;
-			}
-			s = ++s1;
-		}
-	} while (1);
+        register const Wchar *haystack = s1;
+        register const Wchar *needle = s2;
+        int pos = 0;
+        register int cnt;
+        size_t hay_len;
+        size_t n_len;
+
+        if (!*needle) return (Wchar *) s1;
+        n_len = wcslen(needle);
+        hay_len = wcslen(haystack);
+
+        cnt = n_len;
+        do {
+                /* Give up if needle is longer than remaining haystack */
+                if ((pos + n_len - 1) > hay_len) return NULL;
+
+                /* Scan for match in both directions */
+                while (*needle == *haystack) {
+                        cnt--;
+                        if (!cnt) return (Wchar *) (s1 + pos);
+                        if (*(needle + cnt) == *(haystack + cnt)) {
+                                cnt--;
+                                if (!cnt) return (Wchar *) (s1 + pos);
+                        } else break;
+                        needle++;
+                        haystack++;
+                }
+                pos++;
+                haystack = s1 + pos;
+                needle = s2;
+                cnt = n_len;
+        } while (1);
+
 }
+
 #ifndef WANT_WIDE
 libc_hidden_def(strstr)
 #elif defined __UCLIBC_SUSV3_LEGACY__