Message ID | 54FF35DF.6040709@web.de |
---|---|
State | New |
Headers | show |
Please review: https://sourceware.org/ml/libc-alpha/2015-03/msg00417.html
Please review: https://sourceware.org/ml/libc-alpha/2015-03/msg00417.html
On Tue, Mar 10, 2015 at 07:20:15PM +0100, Leonhard Holz wrote: > This is essentially the benchmark from last year > (https://sourceware.org/ml/libc-alpha/2014-10/msg00345.html) plus an > enchanced benchtests/Makefile to generate the needed locales. The attached > input file have to be deployed to the benchtest directory. > > * benchtests/bench-strcoll.c: New benchmark. > * benchtests/Makefile: Generate locales and run benchmark. > * localedata/gen-locale.sh (generate_locale): Make path to charmaps > independent of current working directory. > * benchtests/lorem_ipsum_ar_SA: New benchmark input file. > * benchtests/lorem_ipsum_cs_CZ: Likewise. > * benchtests/lorem_ipsum_da_DK: Likewise. > * benchtests/lorem_ipsum_el_GR: Likewise. > * benchtests/lorem_ipsum_en_GB: Likewise. > * benchtests/lorem_ipsum_en_US: Likewise. > * benchtests/lorem_ipsum_es_ES: Likewise. > * benchtests/lorem_ipsum_fr_FR: Likewise. > * benchtests/lorem_ipsum_hi_IN: Likewise. > * benchtests/lorem_ipsum_hu_HU: Likewise. > * benchtests/lorem_ipsum_is_IS: Likewise. > * benchtests/lorem_ipsum_it_IT: Likewise. > * benchtests/lorem_ipsum_iw_IL: Likewise. > * benchtests/lorem_ipsum_ja_JP: Likewise. > * benchtests/lorem_ipsum_pl_PL: Likewise. > * benchtests/lorem_ipsum_pt_PT: Likewise. > * benchtests/lorem_ipsum_ru_RU: Likewise. > * benchtests/lorem_ipsum_sr_RS: Likewise. > * benchtests/lorem_ipsum_sv_SE: Likewise. > * benchtests/lorem_ipsum_tr_TR: Likewise. > * benchtests/lorem_ipsum_vi_VN: Likewise. > * benchtests/lorem_ipsum_zh_CN: Likewise. Put the input files in a separate directory, say, benchtests/strcoll-inputs/. > diff --git a/benchtests/bench-strcoll.c b/benchtests/bench-strcoll.c > index e69de29..6636e38 100644 > --- a/benchtests/bench-strcoll.c > +++ b/benchtests/bench-strcoll.c > @@ -0,0 +1,382 @@ > +/* Measure strcoll execution time in different locales. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#define TEST_MAIN > +#define TEST_NAME "strcoll" > + > +#include <stdio.h> > +#include <fcntl.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <locale.h> > +#include <unistd.h> > +#include <dirent.h> > +#include "json-lib.h" > +#include "bench-timing.h" > + > +/* many thanks to http://generator.lorem-ipsum.info/ */ Capitalize first letter and two spaces before comment end. > +#define CHARSET "UTF-8" > +#define INPUT_PREFIX "lorem_ipsum_" > + > +static const char *const locales[] = { > + "vi_VN", > + "en_US", > + "ar_SA", > + "en_US", > + "zh_CN", > + "cs_CZ", > + "en_GB", > + "da_DK", > + "pl_PL", > + "fr_FR", > + "pt_PT", > + "el_GR", > + "ru_RU", > + "iw_IL", > + "es_ES", > + "hi_IN", > + "sv_SE", > + "hu_HU", > + "tr_TR", > + "is_IS", > + "it_IT", > + "sr_RS", > + "ja_JP" > +}; > + > +#define TEXTFILE_DELIMITER " \n\r\t.,?!" > +#define FILENAMES_DELIMITER "\n\r" > + > +static int > +is_dir (const char *filename) > +{ > + int is_dir = 0; > + struct stat stats; > + > + if (stat (filename, &stats) == 0) > + is_dir = stats.st_mode & S_IFDIR; > + > + return is_dir; > +} > + > +static char * > +concat_path (const char *dirname, const char *filename) > +{ > + char *path; > + int ret = asprintf (&path, "%s/%s", dirname, filename); > + assert (ret > 0); > + return path; > +} > + > +static char * > +read_file (const char *filename) > +{ > + struct stat stats; > + char *buffer = NULL; > + int fd = open (filename, O_CLOEXEC); > + > + if (fd >= 0) > + { > + if (fstat (fd, &stats) == 0) > + { > + buffer = malloc (stats.st_size + 1); > + if (buffer) > + { > + if (read (fd, buffer, stats.st_size) == stats.st_size) > + buffer[stats.st_size] = '\0'; > + else > + { > + free (buffer); > + buffer = NULL; > + } > + } > + } > + close (fd); > + } > + > + return buffer; > +} > + > +static size_t > +count_words (const char *text, const char *delim) > +{ > + size_t wordcount = 0; > + char *tmp = strdup (text); > + > + char *token = strtok (tmp, delim); > + while (token != NULL) > + { > + if (*token != '\0') > + wordcount++; > + token = strtok (NULL, delim); > + } > + > + free (tmp); > + return wordcount; > +} > + > +typedef struct > +{ > + size_t size; > + char **words; > +} word_list; > + > +static word_list * > +new_word_list (size_t size) > +{ > + word_list *list = malloc (sizeof (word_list)); > + assert (list != NULL); > + list->size = size; > + list->words = malloc (size * sizeof (char *)); > + assert (list->words != NULL); > + return list; > +} > + > +static word_list * > +merge_word_list (word_list * dst, word_list * src) > +{ > + size_t i, n = dst->size; > + dst->size += src->size; > + > + char **new_words = malloc (dst->size * sizeof (char *)); > + assert (new_words != NULL); > + memcpy (new_words, dst->words, n * sizeof (char *)); > + free (dst->words); > + dst->words = new_words; > + > + for (i = 0; i < src->size; i++) > + dst->words[i + n] = src->words[i]; > + > + free (src->words); > + free (src); > + > + return dst; > +} > + > +static word_list * > +file_word_list (const char *dirname) > +{ > + DIR *dir; > + struct dirent *ent; > + word_list *list = NULL; > + > + if ((dir = opendir (dirname)) != NULL) > + { > + size_t ent_cnt = 0, i = 0; > + word_list *sublist = new_word_list (0); > + > + while ((ent = readdir (dir)) != NULL) > + if (ent->d_name[0] != '.') > + { > + char *subdirname = concat_path (dirname, ent->d_name); > + if (is_dir (subdirname)) > + merge_word_list (sublist, file_word_list (subdirname)); > + free (subdirname); > + > + ent_cnt++; > + } > + > + rewinddir (dir); > + list = new_word_list (ent_cnt); > + while ((ent = readdir (dir)) != NULL) > + if (ent->d_name[0] != '.') > + { > + list->words[i] = strdup (ent->d_name); > + i++; > + } > + > + merge_word_list (list, sublist); > + closedir (dir); > + } > + > + return list; > +} > + > +static word_list * > +str_word_list (const char *str, const char *delim) > +{ > + size_t n = 0; > + word_list *list = new_word_list (count_words (str, delim)); > + > + char *toks = strdup (str); > + char *word = strtok (toks, delim); > + while (word != NULL && n < list->size) > + { > + if (*word != '\0') > + list->words[n++] = strdup (word); > + word = strtok (NULL, delim); > + } > + > + free (toks); > + return list; > +} > + > +static word_list * > +copy_word_list (const word_list *list) > +{ > + size_t i; > + word_list *copy = new_word_list (list->size); > + > + for (i = 0; i < list->size; i++) > + copy->words[i] = strdup (list->words[i]); > + > + return copy; > +} > + > +static void > +free_word_list (word_list *list) > +{ > + size_t i; > + for (i = 0; i < list->size; i++) > + free (list->words[i]); > + > + free (list->words); > + free (list); > +} > + > +static int > +compare_words (const void *a, const void *b) > +{ > + const char *s1 = *(char **) a; > + const char *s2 = *(char **) b; > + return strcoll (s1, s2); > +} > + > +#undef INNER_LOOP_ITERS > +#define INNER_LOOP_ITERS 16 > + > +static void > +bench_list (json_ctx_t *json_ctx, word_list *list) > +{ > + size_t i; > + timing_t start, stop, cur; > + > + word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); > + assert (tests != NULL); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + tests[i] = copy_word_list (list); > + > + TIMING_NOW (start); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); > + TIMING_NOW (stop); > + > + TIMING_DIFF (cur, start, stop); > + setlocale (LC_ALL, "en_US.UTF-8"); > + json_attr_double (json_ctx, "duration", cur); > + json_attr_double (json_ctx, "iterations", i); > + json_attr_double (json_ctx, "mean", cur / i); > + > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + free_word_list (tests[i]); > + free (tests); > +} > + > +#define OK 0 > +#define ERROR_LOCALE 1 > +#define ERROR_IO 2 Use an enum instead. > + > +static int > +bench_file (json_ctx_t *json_ctx, const char *filename, const char *delim, > + const char *locale) Adjust this as recommended below. > +{ > + if (setlocale (LC_ALL, locale) == NULL) > + return ERROR_LOCALE; > + > + char *text = read_file (filename); > + if (text == NULL) > + return ERROR_IO; > + > + word_list *list = str_word_list (text, delim); > + > + json_attr_object_begin (json_ctx, locale); > + bench_list (json_ctx, list); > + json_attr_object_end (json_ctx); > + > + free_word_list (list); > + free (text); > + return OK; > +} > + > +static int > +bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) If you just create a file with this information once, you can collapse this special case into the other cases and get rid of this function. > +{ > + if (setlocale (LC_ALL, locale) == NULL) > + return ERROR_LOCALE; > + > + word_list *list = file_word_list (dirname); > + if (list == NULL) > + return ERROR_IO; > + > + json_attr_object_begin (json_ctx, ""); > + json_attr_string (json_ctx, "locale", locale); > + bench_list (json_ctx, list); > + json_attr_object_end (json_ctx); > + > + free_word_list (list); > + return OK; > +} > + > +static int > +do_bench (void) > +{ > + timing_t res; > + TIMING_INIT (res); > + > + json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); > + assert (json_ctx != NULL); > + json_init (json_ctx, 2, stdout); > + json_attr_object_begin (json_ctx, "strcoll"); > + > + int result = bench_file_list (json_ctx, "..", "C"); > + if (result != OK) { > + error: > + if (result == ERROR_LOCALE) > + printf ("Failed to set locale en_US.UTF-8, aborting!\n"); s/en_US.UTF-8/C/ In fact, set a variable and use it to pass to bench_file_list and the printf so as to avoid such an error. > + else if (result == ERROR_IO) > + printf ("Failed to read libc directory, aborting!\n"); > + else > + printf ("Error, aborting!\n"); Print the value of the result. > + return result; > + } > + result = bench_file_list (json_ctx, "..", "en_US.UTF-8"); > + if (result != OK) > + goto error; Instead, split the error handling code into a function, set RESULT and goto a label 'out' near the end of the function that just returns RESULT. > + > + size_t i; > + char filename[40], locale[15]; > + for (i = 0; i < (sizeof (locales) / sizeof (locales[0])); i++) > + { > + sprintf (locale, "%s." CHARSET, locales[i]); > + sprintf (filename, INPUT_PREFIX "%s", locales[i]); > + result = bench_file (json_ctx, filename, TEXTFILE_DELIMITER, locale); Just make bench_file accept the locale name and deduce the locale and filename in the function. You don't really need to pass the delimiter either since you don't call bench_file with anything other than TEXTFILE_DELIMITER. That could simplify your code to: result = bench_file (json_ctx, locales[i]); if (result != OK) goto out; > + if (result != OK) > + goto error; > + } > + > + json_attr_object_end (json_ctx); > + free (json_ctx); This is where the out: label goes and you return RESULT instead of OK. > + return OK; > +} > + > +#define TEST_FUNCTION do_bench () > + > +/* On slower platforms this test needs more than the default 2 seconds. */ > +#define TIMEOUT 10 > + > +#include "../test-skeleton.c" Don't use the test-skeleton. Make do_bench the main function. The other string benchmarks do this because they were directly ported from the old test cases. In fact, they need to be evaluated and improved. > diff --git a/benchtests/Makefile b/benchtests/Makefile > index 08603a2..4aaff73 100644 > --- a/benchtests/Makefile > +++ b/benchtests/Makefile > @@ -34,9 +34,22 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy > memmem memmove \ > mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ > strcat strchr strchrnul strcmp strcpy strcspn strlen \ > strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ > - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok > + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ > + strcoll > string-bench-all := $(string-bench) > > +# We have to generate locales > +LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ > + ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ > + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ > + iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ > + hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 > +LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') > +CHARMAPS := $(shell echo "$(LOCALES)" | \ > + sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) > +CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) > +gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) > + > stdlib-bench := strtod > > benchset := $(string-bench-all) $(stdlib-bench) > @@ -65,6 +78,19 @@ binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) > binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset)) > binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc)) > > +# Dependency for the locale files. We actually make it depend only on > +# one of the files. > +$(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)): %: \ > + ../localedata/gen-locale.sh \ > + $(common-objpfx)locale/localedef \ > + ../localedata/Makefile \ > + $(addprefix ../localedata/charmaps/,$(CHARMAPS)) \ > + $(addprefix ../localedata/locales/,$(LOCALE_SRCS)) > + @$(SHELL) ../localedata/gen-locale.sh $(common-objpfx) \ > + '$(built-program-cmd-before-env)' '$(run-program-env)' \ > + '$(built-program-cmd-after-env)' $@; \ > + $(evaluate-test) > + AFAICT all of this is copied from localedata/Makefile. Instead, make a common snippet that you can include in both these places. This will reduce the pain of remembering to update two locations when any of this code needs to be fixed. > # The default duration: 10 seconds. > ifndef BENCH_DURATION > BENCH_DURATION := 10 > @@ -107,7 +133,7 @@ bench-clean: > rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc)) > rm -f $(timing-type) $(addsuffix .o,$(timing-type)) > > -bench: $(timing-type) bench-set bench-func bench-malloc > +bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc > > bench-set: $(binaries-benchset) > for run in $^; do \ > > diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh > index 4156f9e..2a48a34 100644 > --- a/localedata/gen-locale.sh > +++ b/localedata/gen-locale.sh > @@ -30,7 +30,8 @@ generate_locale () > charmap=$1 > input=$2 > out=$3 > - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ > + if ${localedef_before_env} ${run_program_env} \ > + I18NPATH=${common_objpfx}../localedata \ Shouldn't this be I18NPATH=${common_objpfx}localedata? > ${localedef_after_env} --quiet -c -f $charmap -i $input \ > ${common_objpfx}localedata/$out > then >
>> +static int >> +bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) > > If you just create a file with this information once, you can collapse > this special case into the other cases and get rid of this function. > This implies that the file list outdates over time. Is this ok? >> diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh >> index 4156f9e..2a48a34 100644 >> --- a/localedata/gen-locale.sh >> +++ b/localedata/gen-locale.sh >> @@ -30,7 +30,8 @@ generate_locale () >> charmap=$1 >> input=$2 >> out=$3 >> - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ >> + if ${localedef_before_env} ${run_program_env} \ >> + I18NPATH=${common_objpfx}../localedata \ > > Shouldn't this be I18NPATH=${common_objpfx}localedata? > I18NPATH has to point to glibc/localedata and common_objpfx points to the build directory. So I18NPATH=${common_objpfx}localedata should not work.
Leonhard Holz <leonhard.holz@web.de> writes: >>> diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh >>> index 4156f9e..2a48a34 100644 >>> --- a/localedata/gen-locale.sh >>> +++ b/localedata/gen-locale.sh >>> @@ -30,7 +30,8 @@ generate_locale () >>> charmap=$1 >>> input=$2 >>> out=$3 >>> - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ >>> + if ${localedef_before_env} ${run_program_env} \ >>> + I18NPATH=${common_objpfx}../localedata \ >> >> Shouldn't this be I18NPATH=${common_objpfx}localedata? >> > > I18NPATH has to point to glibc/localedata and common_objpfx points to the > build directory. So I18NPATH=${common_objpfx}localedata should not work. ${common_objpfx}../ doesn't get you to the source directory. Andreas.
On Thu, Mar 26, 2015 at 02:31:29PM +0100, Leonhard Holz wrote: > This implies that the file list outdates over time. Is this ok? Sure, we don;t really care about that, do we? If you do, then just regenerate the file in the make target by simply doing: ls > strcoll-inputs/en_US-tmp mv strcoll-inputs/en_US{-tmp,} and maintain strcoll-inputs/C as a symlink to strcoll-inputs/en_US. > I18NPATH has to point to glibc/localedata and common_objpfx points to the > build directory. So I18NPATH=${common_objpfx}localedata should not work. Then you're looking for $(srcdir) (after you set it to @srcdir@ I think) and not $(common_objpfx). Actually since you're in benchtests/ when you're running this, you can just set it as ../localedata, i.e. without the $(common_objpfx). Siddhesh
diff --git a/benchtests/bench-strcoll.c b/benchtests/bench-strcoll.c index e69de29..6636e38 100644 --- a/benchtests/bench-strcoll.c +++ b/benchtests/bench-strcoll.c @@ -0,0 +1,382 @@ +/* Measure strcoll execution time in different locales. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define TEST_MAIN +#define TEST_NAME "strcoll" + +#include <stdio.h> +#include <fcntl.h> +#include <assert.h> +#include <stdlib.h> +#include <locale.h> +#include <unistd.h> +#include <dirent.h> +#include "json-lib.h" +#include "bench-timing.h" + +/* many thanks to http://generator.lorem-ipsum.info/ */ +#define CHARSET "UTF-8" +#define INPUT_PREFIX "lorem_ipsum_" + +static const char *const locales[] = { + "vi_VN", + "en_US", + "ar_SA", + "en_US", + "zh_CN", + "cs_CZ", + "en_GB", + "da_DK", + "pl_PL", + "fr_FR", + "pt_PT", + "el_GR", + "ru_RU", + "iw_IL", + "es_ES", + "hi_IN", + "sv_SE", + "hu_HU", + "tr_TR", + "is_IS", + "it_IT", + "sr_RS", + "ja_JP" +}; + +#define TEXTFILE_DELIMITER " \n\r\t.,?!" +#define FILENAMES_DELIMITER "\n\r" + +static int +is_dir (const char *filename) +{ + int is_dir = 0; + struct stat stats; + + if (stat (filename, &stats) == 0) + is_dir = stats.st_mode & S_IFDIR; + + return is_dir; +} + +static char * +concat_path (const char *dirname, const char *filename) +{ + char *path; + int ret = asprintf (&path, "%s/%s", dirname, filename); + assert (ret > 0); + return path; +} + +static char * +read_file (const char *filename) +{ + struct stat stats; + char *buffer = NULL; + int fd = open (filename, O_CLOEXEC); + + if (fd >= 0) + { + if (fstat (fd, &stats) == 0) + { + buffer = malloc (stats.st_size + 1); + if (buffer) + { + if (read (fd, buffer, stats.st_size) == stats.st_size) + buffer[stats.st_size] = '\0'; + else + { + free (buffer); + buffer = NULL; + } + } + } + close (fd); + } + + return buffer; +} + +static size_t +count_words (const char *text, const char *delim) +{ + size_t wordcount = 0; + char *tmp = strdup (text); + + char *token = strtok (tmp, delim); + while (token != NULL) + { + if (*token != '\0') + wordcount++; + token = strtok (NULL, delim); + } + + free (tmp); + return wordcount; +} + +typedef struct +{ + size_t size; + char **words; +} word_list; + +static word_list * +new_word_list (size_t size) +{ + word_list *list = malloc (sizeof (word_list)); + assert (list != NULL); + list->size = size; + list->words = malloc (size * sizeof (char *)); + assert (list->words != NULL); + return list; +} + +static word_list * +merge_word_list (word_list * dst, word_list * src) +{ + size_t i, n = dst->size; + dst->size += src->size; + + char **new_words = malloc (dst->size * sizeof (char *)); + assert (new_words != NULL); + memcpy (new_words, dst->words, n * sizeof (char *)); + free (dst->words); + dst->words = new_words; + + for (i = 0; i < src->size; i++) + dst->words[i + n] = src->words[i]; + + free (src->words); + free (src); + + return dst; +} + +static word_list * +file_word_list (const char *dirname) +{ + DIR *dir; + struct dirent *ent; + word_list *list = NULL; + + if ((dir = opendir (dirname)) != NULL) + { + size_t ent_cnt = 0, i = 0; + word_list *sublist = new_word_list (0); + + while ((ent = readdir (dir)) != NULL) + if (ent->d_name[0] != '.') + { + char *subdirname = concat_path (dirname, ent->d_name); + if (is_dir (subdirname)) + merge_word_list (sublist, file_word_list (subdirname)); + free (subdirname); + + ent_cnt++; + } + + rewinddir (dir); + list = new_word_list (ent_cnt); + while ((ent = readdir (dir)) != NULL) + if (ent->d_name[0] != '.') + { + list->words[i] = strdup (ent->d_name); + i++; + } + + merge_word_list (list, sublist); + closedir (dir); + } + + return list; +} + +static word_list * +str_word_list (const char *str, const char *delim) +{ + size_t n = 0; + word_list *list = new_word_list (count_words (str, delim)); + + char *toks = strdup (str); + char *word = strtok (toks, delim); + while (word != NULL && n < list->size) + { + if (*word != '\0') + list->words[n++] = strdup (word); + word = strtok (NULL, delim); + } + + free (toks); + return list; +} + +static word_list * +copy_word_list (const word_list *list) +{ + size_t i; + word_list *copy = new_word_list (list->size); + + for (i = 0; i < list->size; i++) + copy->words[i] = strdup (list->words[i]); + + return copy; +} + +static void +free_word_list (word_list *list) +{ + size_t i; + for (i = 0; i < list->size; i++) + free (list->words[i]); + + free (list->words); + free (list); +} + +static int +compare_words (const void *a, const void *b) +{ + const char *s1 = *(char **) a; + const char *s2 = *(char **) b; + return strcoll (s1, s2); +} + +#undef INNER_LOOP_ITERS +#define INNER_LOOP_ITERS 16 + +static void +bench_list (json_ctx_t *json_ctx, word_list *list) +{ + size_t i; + timing_t start, stop, cur; + + word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); + assert (tests != NULL); + for (i = 0; i < INNER_LOOP_ITERS; i++) + tests[i] = copy_word_list (list); + + TIMING_NOW (start); + for (i = 0; i < INNER_LOOP_ITERS; i++) + qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); + TIMING_NOW (stop); + + TIMING_DIFF (cur, start, stop); + setlocale (LC_ALL, "en_US.UTF-8"); + json_attr_double (json_ctx, "duration", cur); + json_attr_double (json_ctx, "iterations", i); + json_attr_double (json_ctx, "mean", cur / i); + + for (i = 0; i < INNER_LOOP_ITERS; i++) + free_word_list (tests[i]); + free (tests); +} + +#define OK 0 +#define ERROR_LOCALE 1 +#define ERROR_IO 2 + +static int +bench_file (json_ctx_t *json_ctx, const char *filename, const char *delim, + const char *locale) +{ + if (setlocale (LC_ALL, locale) == NULL) + return ERROR_LOCALE; + + char *text = read_file (filename); + if (text == NULL) + return ERROR_IO; + + word_list *list = str_word_list (text, delim); + + json_attr_object_begin (json_ctx, locale); + bench_list (json_ctx, list); + json_attr_object_end (json_ctx); + + free_word_list (list); + free (text); + return OK; +} + +static int +bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) +{ + if (setlocale (LC_ALL, locale) == NULL) + return ERROR_LOCALE; + + word_list *list = file_word_list (dirname); + if (list == NULL) + return ERROR_IO; + + json_attr_object_begin (json_ctx, ""); + json_attr_string (json_ctx, "locale", locale); + bench_list (json_ctx, list); + json_attr_object_end (json_ctx); + + free_word_list (list); + return OK; +} + +static int +do_bench (void) +{ + timing_t res; + TIMING_INIT (res); + + json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); + assert (json_ctx != NULL); + json_init (json_ctx, 2, stdout); + json_attr_object_begin (json_ctx, "strcoll"); + + int result = bench_file_list (json_ctx, "..", "C"); + if (result != OK) { + error: + if (result == ERROR_LOCALE) + printf ("Failed to set locale en_US.UTF-8, aborting!\n"); + else if (result == ERROR_IO) + printf ("Failed to read libc directory, aborting!\n"); + else + printf ("Error, aborting!\n"); + return result; + } + result = bench_file_list (json_ctx, "..", "en_US.UTF-8"); + if (result != OK) + goto error; + + size_t i; + char filename[40], locale[15]; + for (i = 0; i < (sizeof (locales) / sizeof (locales[0])); i++) + { + sprintf (locale, "%s." CHARSET, locales[i]); + sprintf (filename, INPUT_PREFIX "%s", locales[i]); + result = bench_file (json_ctx, filename, TEXTFILE_DELIMITER, locale); + if (result != OK) + goto error; + } + + json_attr_object_end (json_ctx); + free (json_ctx); + return OK; +} + +#define TEST_FUNCTION do_bench () + +/* On slower platforms this test needs more than the default 2 seconds. */ +#define TIMEOUT 10 + +#include "../test-skeleton.c" diff --git a/benchtests/Makefile b/benchtests/Makefile index 08603a2..4aaff73 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -34,9 +34,22 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ strcat strchr strchrnul strcmp strcpy strcspn strlen \ strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ + strcoll string-bench-all := $(string-bench) +# We have to generate locales +LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ + ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ + iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ + hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 +LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') +CHARMAPS := $(shell echo "$(LOCALES)" | \ + sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) +CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) +gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) + stdlib-bench := strtod benchset := $(string-bench-all) $(stdlib-bench) @@ -65,6 +78,19 @@ binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset)) binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc)) +# Dependency for the locale files. We actually make it depend only on +# one of the files. +$(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)): %: \ + ../localedata/gen-locale.sh \ + $(common-objpfx)locale/localedef \ + ../localedata/Makefile \ + $(addprefix ../localedata/charmaps/,$(CHARMAPS)) \ + $(addprefix ../localedata/locales/,$(LOCALE_SRCS)) + @$(SHELL) ../localedata/gen-locale.sh $(common-objpfx) \ + '$(built-program-cmd-before-env)' '$(run-program-env)' \ + '$(built-program-cmd-after-env)' $@; \ + $(evaluate-test) + # The default duration: 10 seconds. ifndef BENCH_DURATION BENCH_DURATION := 10 @@ -107,7 +133,7 @@ bench-clean: rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc)) rm -f $(timing-type) $(addsuffix .o,$(timing-type)) -bench: $(timing-type) bench-set bench-func bench-malloc +bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc bench-set: $(binaries-benchset) for run in $^; do \ diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh index 4156f9e..2a48a34 100644 --- a/localedata/gen-locale.sh +++ b/localedata/gen-locale.sh @@ -30,7 +30,8 @@ generate_locale () charmap=$1 input=$2 out=$3 - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ + if ${localedef_before_env} ${run_program_env} \ + I18NPATH=${common_objpfx}../localedata \ ${localedef_after_env} --quiet -c -f $charmap -i $input \ ${common_objpfx}localedata/$out then