Message ID | 99aaabce8a00d64f7d0738c64af3cbd3e7f82c5c.1653314701.git.fweimer@redhat.com |
---|---|
State | New |
Headers | show |
Series | vfprintf rework to remove vtables | expand |
On 23/05/2022 11:07, Florian Weimer via Libc-alpha wrote: > The iterator allows grouping while scanning forward through > the digits. This enables emitting digits as they are processed. LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > stdio-common/Makefile | 9 +- > stdio-common/grouping_iterator.c | 125 +++++++++++++ > stdio-common/grouping_iterator.h | 65 +++++++ > stdio-common/tst-grouping_iterator.c | 262 +++++++++++++++++++++++++++ > 4 files changed, 460 insertions(+), 1 deletion(-) > create mode 100644 stdio-common/grouping_iterator.c > create mode 100644 stdio-common/grouping_iterator.h > create mode 100644 stdio-common/tst-grouping_iterator.c > > diff --git a/stdio-common/Makefile b/stdio-common/Makefile > index b1e9144de0..da3a3bc0c9 100644 > --- a/stdio-common/Makefile > +++ b/stdio-common/Makefile > @@ -39,6 +39,7 @@ routines := \ > gentempfd \ > getline \ > getw \ > + grouping_iterator \ > iovfscanf \ > isoc99_fscanf \ > isoc99_scanf \ > @@ -221,6 +222,10 @@ generated += \ > siglist-aux.S \ > # generated > > +tests-internal = \ > + tst-grouping_iterator \ > + # tests-internal > + > test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble > > ifeq ($(run-built-tests),yes) > @@ -288,13 +293,15 @@ LOCALES := \ > hi_IN.UTF-8 \ > ja_JP.EUC-JP \ > ps_AF.UTF-8 \ > - # LOCALES > + tg_TJ.UTF-8 \ > + # LOCALES > include ../gen-locales.mk > > $(objpfx)bug14.out: $(gen-locales) > $(objpfx)scanf13.out: $(gen-locales) > $(objpfx)test-vfprintf.out: $(gen-locales) > $(objpfx)tst-grouping.out: $(gen-locales) > +$(objpfx)tst-grouping_iterator.out: $(gen-locales) > $(objpfx)tst-sprintf.out: $(gen-locales) > $(objpfx)tst-sscanf.out: $(gen-locales) > $(objpfx)tst-swprintf.out: $(gen-locales) Ok. > diff --git a/stdio-common/grouping_iterator.c b/stdio-common/grouping_iterator.c > new file mode 100644 > index 0000000000..cc169e2b09 > --- /dev/null > +++ b/stdio-common/grouping_iterator.c > @@ -0,0 +1,125 @@ > +/* Iterator for inserting thousands separators into numbers. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <grouping_iterator.h> > + > +#include <assert.h> > +#include <limits.h> > +#include <locale/localeinfo.h> > +#include <stdint.h> > +#include <string.h> > + > +/* Initializes *IT with no grouping information for a string of length > + DIGITS, and return false to indicate no grouping. */ > +bool > +__grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) > +{ > + memset (it, 0, sizeof (*it)); > + it->remaining_in_current_group = digits; > + it->remaining = digits; > + return false; > +} Ok, I would guess compiler will optimize the memset away. > + > +static bool > +grouping_iterator_setup (struct grouping_iterator *it, size_t digits, > + const char *grouping) > +{ > + /* We treat all negative values like CHAR_MAX. */ > + > + if (*grouping == CHAR_MAX || *grouping <= 0) > + /* No grouping should be done. */ > + return __grouping_iterator_init_none (it, digits); > + > + size_t remaining_to_group = digits; > + size_t non_repeating_groups = 0; > + size_t groups = 0; > + while (true) > + { > + non_repeating_groups += *grouping; > + if (remaining_to_group <= (unsigned int) *grouping) > + break; > + > + ++groups; > + remaining_to_group -= *grouping++; > + > + if (*grouping == CHAR_MAX > +#if CHAR_MIN < 0 > + || *grouping < 0 > +#endif > + ) > + /* No more grouping should be done. */ > + break; > + else if (*grouping == 0) > + { > + /* Same grouping repeats. */ > + --grouping; > + non_repeating_groups -= *grouping; /* Over-counted. */ > + size_t repeats = (remaining_to_group - 1) / *grouping; > + groups += repeats; > + remaining_to_group -= repeats * *grouping; > + break; > + } > + } > + > + it->remaining_in_current_group = remaining_to_group; > + it->remaining = digits; > + it->groupings = grouping; > + it->non_repeating_groups = non_repeating_groups; > + it->separators = groups; > + return it->separators > 0; > +} > + > +/* Returns the appropriate grouping item in LOC depending on CATEGORY > + (which must be LC_MONETARY or LC_NUMERIC). */ > +static const char * > +get_grouping (int category, locale_t loc) > +{ > + return _nl_lookup (loc, category, > + category == LC_MONETARY ? MON_GROUPING : GROUPING); > +} > + > + > +bool > +__grouping_iterator_init (struct grouping_iterator *it, > + int category, locale_t loc, size_t digits) > +{ > + if (digits <= 1) > + return __grouping_iterator_init_none (it, digits); > + else > + return grouping_iterator_setup (it, digits, get_grouping (category, loc)); > +} > + > +bool > +__grouping_iterator_next (struct grouping_iterator *it) > +{ > + assert (it->remaining > 0); > + --it->remaining; > + > + if (it->remaining_in_current_group > 0) > + { > + --it->remaining_in_current_group; > + return false; > + } > + > + /* If we are in the non-repeating part, switch group. */ > + if (it->remaining < it->non_repeating_groups) > + --it->groupings; > + > + it->remaining_in_current_group = *it->groupings - 1; > + return true; > +} Ok. > diff --git a/stdio-common/grouping_iterator.h b/stdio-common/grouping_iterator.h > new file mode 100644 > index 0000000000..ca41a7fdc1 > --- /dev/null > +++ b/stdio-common/grouping_iterator.h > @@ -0,0 +1,65 @@ > +/* Iterator for grouping a number while scanning it forward. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef GROUPING_ITERATOR_H > +#define GROUPING_ITERATOR_H > + > +#include <locale.h> > +#include <stdbool.h> > +#include <stddef.h> > + > +struct grouping_iterator > +{ > + /* Number of characters in the current group. If this reaches zero, > + a thousands separator needs to be emittted. */ > + size_t remaining_in_current_group; > + > + /* Number of characters remaining in the number. This is used to > + detect the start of the non-repeating groups. */ > + size_t remaining; > + > + /* Points to the current grouping descriptor. */ > + const char *groupings; > + > + /* Total number of characters in the non-repeating groups. */ > + size_t non_repeating_groups; > + > + /* Number of separators that will be inserted if the whole number is > + processed. (Does not change during iteration.) */ > + size_t separators; > +}; > + > +struct __locale_data; > + > +/* Initializes *IT with the data from LOCDATA (which must be for > + LC_MONETARY or LC_NUMERIC). DIGITS is the length of the number. > + Returns true if grouping is active, false if not. */ > +bool __grouping_iterator_init (struct grouping_iterator *it, > + int category, locale_t loc, > + size_t digits) attribute_hidden; > + > +/* Initializes *IT with no grouping information for a string of length > + DIGITS, and return false to indicate no grouping. */ > +bool __grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) > + attribute_hidden; > + > +/* Advances to the next character and returns true if a thousands > + separator should be inserted before emitting it. */ > +bool __grouping_iterator_next (struct grouping_iterator *it); > + > +#endif /* GROUPING_ITERATOR_H */ Ok. > diff --git a/stdio-common/tst-grouping_iterator.c b/stdio-common/tst-grouping_iterator.c > new file mode 100644 > index 0000000000..97d8f40628 > --- /dev/null > +++ b/stdio-common/tst-grouping_iterator.c > @@ -0,0 +1,262 @@ > +/* Test for struct grouping_iterator. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +/* Rebuild the fail to access internal-only functions. */ > +#include <grouping_iterator.c> > + > +#include <stdio.h> > +#include <support/check.h> > +#include <support/support.h> > +#include <support/test-driver.h> > + > +static void > +check (int lineno, const char *groupings, > + const char *input, const char *expected) > +{ > + if (test_verbose) > + { > + printf ("info: %s:%d: \"%s\" via \"", __FILE__, lineno, input); > + for (const char *p = groupings; *p != 0; ++p) > + printf ("\\%o", *p & 0xff); > + printf ("\" to \"%s\"\n", expected); > + } > + > + size_t initial_group = strchrnul (expected, '\'') - expected; > + size_t separators = 0; > + for (const char *p = expected; *p != '\0'; ++p) > + separators += *p == '\''; > + > + size_t digits = strlen (input); > + char *out = xmalloc (2 * digits + 1); > + > + struct grouping_iterator it; > + TEST_COMPARE (grouping_iterator_setup (&it, digits, groupings), > + strchr (expected, '\'') != NULL); > + TEST_COMPARE (it.remaining, digits); > + TEST_COMPARE (it.remaining_in_current_group, initial_group); > + TEST_COMPARE (it.separators, separators); > + > + char *p = out; > + while (*input != '\0') > + { > + if (__grouping_iterator_next (&it)) > + *p++ = '\''; > + TEST_COMPARE (it.separators, separators); > + *p++ = *input++; > + } > + *p++ = '\0'; > + > + TEST_COMPARE (it.remaining, 0); > + TEST_COMPARE (it.remaining_in_current_group, 0); > + > + TEST_COMPARE_STRING (out, expected); > + > + free (out); > +} > + > +static int > +do_test (void) > +{ > + check (__LINE__, "", "1", "1"); > + check (__LINE__, "", "12", "12"); > + check (__LINE__, "", "123", "123"); > + check (__LINE__, "", "1234", "1234"); > + > + check (__LINE__, "\3", "1", "1"); > + check (__LINE__, "\3", "12", "12"); > + check (__LINE__, "\3", "123", "123"); > + check (__LINE__, "\3", "1234", "1'234"); > + check (__LINE__, "\3", "12345", "12'345"); > + check (__LINE__, "\3", "123456", "123'456"); > + check (__LINE__, "\3", "1234567", "1'234'567"); > + check (__LINE__, "\3", "12345678", "12'345'678"); > + check (__LINE__, "\3", "123456789", "123'456'789"); > + check (__LINE__, "\3", "1234567890", "1'234'567'890"); > + > + check (__LINE__, "\2\3", "1", "1"); > + check (__LINE__, "\2\3", "12", "12"); > + check (__LINE__, "\2\3", "123", "1'23"); > + check (__LINE__, "\2\3", "1234", "12'34"); > + check (__LINE__, "\2\3", "12345", "123'45"); > + check (__LINE__, "\2\3", "123456", "1'234'56"); > + check (__LINE__, "\2\3", "1234567", "12'345'67"); > + check (__LINE__, "\2\3", "12345678", "123'456'78"); > + check (__LINE__, "\2\3", "123456789", "1'234'567'89"); > + check (__LINE__, "\2\3", "1234567890", "12'345'678'90"); > + > + check (__LINE__, "\3\2", "1", "1"); > + check (__LINE__, "\3\2", "12", "12"); > + check (__LINE__, "\3\2", "123", "123"); > + check (__LINE__, "\3\2", "1234", "1'234"); > + check (__LINE__, "\3\2", "12345", "12'345"); > + check (__LINE__, "\3\2", "123456", "1'23'456"); > + check (__LINE__, "\3\2", "1234567", "12'34'567"); > + check (__LINE__, "\3\2", "12345678", "1'23'45'678"); > + check (__LINE__, "\3\2", "123456789", "12'34'56'789"); > + check (__LINE__, "\3\2", "1234567890", "1'23'45'67'890"); > + > + check (__LINE__, "\3\2\1", "1", "1"); > + check (__LINE__, "\3\2\1", "12", "12"); > + check (__LINE__, "\3\2\1", "123", "123"); > + check (__LINE__, "\3\2\1", "1234", "1'234"); > + check (__LINE__, "\3\2\1", "12345", "12'345"); > + check (__LINE__, "\3\2\1", "123456", "1'23'456"); > + check (__LINE__, "\3\2\1", "1234567", "1'2'34'567"); > + check (__LINE__, "\3\2\1", "12345678", "1'2'3'45'678"); > + check (__LINE__, "\3\2\1", "123456789", "1'2'3'4'56'789"); > + check (__LINE__, "\3\2\1", "1234567890", "1'2'3'4'5'67'890"); > + > + check (__LINE__, "\2\3\1", "1", "1"); > + check (__LINE__, "\2\3\1", "12", "12"); > + check (__LINE__, "\2\3\1", "123", "1'23"); > + check (__LINE__, "\2\3\1", "1234", "12'34"); > + check (__LINE__, "\2\3\1", "12345", "123'45"); > + check (__LINE__, "\2\3\1", "123456", "1'234'56"); > + check (__LINE__, "\2\3\1", "1234567", "1'2'345'67"); > + check (__LINE__, "\2\3\1", "12345678", "1'2'3'456'78"); > + check (__LINE__, "\2\3\1", "123456789", "1'2'3'4'567'89"); > + check (__LINE__, "\2\3\1", "1234567890", "1'2'3'4'5'678'90"); > + > + /* No repeats. */ > + check (__LINE__, "\3\377", "1", "1"); > + check (__LINE__, "\3\377", "12", "12"); > + check (__LINE__, "\3\377", "123", "123"); > + check (__LINE__, "\3\377", "1234", "1'234"); > + check (__LINE__, "\3\377", "12345", "12'345"); > + check (__LINE__, "\3\377", "123456", "123'456"); > + check (__LINE__, "\3\377", "1234567", "1234'567"); > + check (__LINE__, "\3\377", "12345678", "12345'678"); > + > + check (__LINE__, "\2\3\377", "1", "1"); > + check (__LINE__, "\2\3\377", "12", "12"); > + check (__LINE__, "\2\3\377", "123", "1'23"); > + check (__LINE__, "\2\3\377", "1234", "12'34"); > + check (__LINE__, "\2\3\377", "12345", "123'45"); > + check (__LINE__, "\2\3\377", "123456", "1'234'56"); > + check (__LINE__, "\2\3\377", "1234567", "12'345'67"); > + check (__LINE__, "\2\3\377", "12345678", "123'456'78"); > + check (__LINE__, "\2\3\377", "123456789", "1234'567'89"); > + check (__LINE__, "\2\3\377", "1234567890", "12345'678'90"); > + > + check (__LINE__, "\3\2\377", "1", "1"); > + check (__LINE__, "\3\2\377", "12", "12"); > + check (__LINE__, "\3\2\377", "123", "123"); > + check (__LINE__, "\3\2\377", "1234", "1'234"); > + check (__LINE__, "\3\2\377", "12345", "12'345"); > + check (__LINE__, "\3\2\377", "123456", "1'23'456"); > + check (__LINE__, "\3\2\377", "1234567", "12'34'567"); > + check (__LINE__, "\3\2\377", "12345678", "123'45'678"); > + check (__LINE__, "\3\2\377", "123456789", "1234'56'789"); > + check (__LINE__, "\3\2\377", "1234567890", "12345'67'890"); > + > + /* Locale-based tests. */ > + > + locale_t loc; > + struct lc_ctype_data *ctype; > + struct grouping_iterator it; > + > + loc = newlocale (LC_ALL_MASK, "de_DE.UTF-8", 0); > + TEST_VERIFY_EXIT (loc != 0); > + ctype = loc->__locales[LC_CTYPE]->private; > + TEST_VERIFY (!ctype->outdigit_translation_needed); > + for (int i = 0; i <= 9; ++i) > + TEST_COMPARE (ctype->outdigit_bytes[i], 1); > + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ > + TEST_COMPARE (it.separators, 2); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ > + TEST_COMPARE (it.separators, 2); > + freelocale (loc); > + > + loc = newlocale (LC_ALL_MASK, "tg_TJ.UTF-8", 0); > + TEST_VERIFY_EXIT (loc != 0); > + ctype = loc->__locales[LC_CTYPE]->private; > + TEST_VERIFY (!ctype->outdigit_translation_needed); > + for (int i = 0; i <= 9; ++i) > + TEST_COMPARE (ctype->outdigit_bytes[i], 1); > + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ > + TEST_COMPARE (it.separators, 2); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ > + TEST_COMPARE (it.separators, 2); > + freelocale (loc); > + > + loc = newlocale (LC_ALL_MASK, "hi_IN.UTF-8", 0); > + TEST_VERIFY_EXIT (loc != 0); > + ctype = loc->__locales[LC_CTYPE]->private; > + TEST_VERIFY (ctype->outdigit_translation_needed); > + for (int i = 0; i <= 9; ++i) > + /* Locale uses Devanagari digits. */ > + TEST_COMPARE (ctype->outdigit_bytes[i], 3); > + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 3); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 0); > + TEST_COMPARE (it.separators, 2); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 1); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 2); > + TEST_COMPARE (it.non_repeating_groups, 3); > + TEST_COMPARE (it.separators, 3); > + freelocale (loc); > + > + loc = newlocale (LC_ALL_MASK, "ps_AF.UTF-8", 0); > + TEST_VERIFY_EXIT (loc != 0); > + ctype = loc->__locales[LC_CTYPE]->private; > + TEST_VERIFY (ctype->outdigit_translation_needed); > + for (int i = 0; i <= 9; ++i) > + /* Locale uses non-ASCII digits. */ > + TEST_COMPARE (ctype->outdigit_bytes[i], 2); > + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 2); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 0); > + TEST_COMPARE (it.separators, 2); > + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); > + TEST_COMPARE (it.remaining_in_current_group, 2); > + TEST_COMPARE (it.remaining, 8); > + TEST_COMPARE (*it.groupings, 3); > + TEST_COMPARE (it.non_repeating_groups, 0); > + TEST_COMPARE (it.separators, 2); > + freelocale (loc); > + > + return 0; > +} > + > +#include <support/test-driver.c> Ok.
On Mai 23 2022, Florian Weimer via Libc-alpha wrote: > +struct grouping_iterator > +{ > + /* Number of characters in the current group. If this reaches zero, > + a thousands separator needs to be emittted. */ > + size_t remaining_in_current_group; > + > + /* Number of characters remaining in the number. This is used to > + detect the start of the non-repeating groups. */ > + size_t remaining; > + > + /* Points to the current grouping descriptor. */ > + const char *groupings; > + > + /* Total number of characters in the non-repeating groups. */ > + size_t non_repeating_groups; > + > + /* Number of separators that will be inserted if the whole number is > + processed. (Does not change during iteration.) */ > + size_t separators; Does that really need the full range of size_t? I think unsigned int should be enough. > +}; > + > +struct __locale_data; > + > +/* Initializes *IT with the data from LOCDATA (which must be for > + LC_MONETARY or LC_NUMERIC). DIGITS is the length of the number. > + Returns true if grouping is active, false if not. */ > +bool __grouping_iterator_init (struct grouping_iterator *it, > + int category, locale_t loc, > + size_t digits) attribute_hidden; > + > +/* Initializes *IT with no grouping information for a string of length > + DIGITS, and return false to indicate no grouping. */ > +bool __grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) > + attribute_hidden; Similarily, the digits argument can be unsigned int (format string lengths are limited by int anyway).
* Andreas Schwab: > On Mai 23 2022, Florian Weimer via Libc-alpha wrote: > >> +struct grouping_iterator >> +{ >> + /* Number of characters in the current group. If this reaches zero, >> + a thousands separator needs to be emittted. */ >> + size_t remaining_in_current_group; >> + >> + /* Number of characters remaining in the number. This is used to >> + detect the start of the non-repeating groups. */ >> + size_t remaining; >> + >> + /* Points to the current grouping descriptor. */ >> + const char *groupings; >> + >> + /* Total number of characters in the non-repeating groups. */ >> + size_t non_repeating_groups; >> + >> + /* Number of separators that will be inserted if the whole number is >> + processed. (Does not change during iteration.) */ >> + size_t separators; > > Does that really need the full range of size_t? I think unsigned int > should be enough. > >> +}; >> + >> +struct __locale_data; >> + >> +/* Initializes *IT with the data from LOCDATA (which must be for >> + LC_MONETARY or LC_NUMERIC). DIGITS is the length of the number. >> + Returns true if grouping is active, false if not. */ >> +bool __grouping_iterator_init (struct grouping_iterator *it, >> + int category, locale_t loc, >> + size_t digits) attribute_hidden; >> + >> +/* Initializes *IT with no grouping information for a string of length >> + DIGITS, and return false to indicate no grouping. */ >> +bool __grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) >> + attribute_hidden; > > Similarily, the digits argument can be unsigned int (format string > lengths are limited by int anyway). Yes, switching to unsigned int should work. I will make the change and re-test. Thanks, Florian
diff --git a/stdio-common/Makefile b/stdio-common/Makefile index b1e9144de0..da3a3bc0c9 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -39,6 +39,7 @@ routines := \ gentempfd \ getline \ getw \ + grouping_iterator \ iovfscanf \ isoc99_fscanf \ isoc99_scanf \ @@ -221,6 +222,10 @@ generated += \ siglist-aux.S \ # generated +tests-internal = \ + tst-grouping_iterator \ + # tests-internal + test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble ifeq ($(run-built-tests),yes) @@ -288,13 +293,15 @@ LOCALES := \ hi_IN.UTF-8 \ ja_JP.EUC-JP \ ps_AF.UTF-8 \ - # LOCALES + tg_TJ.UTF-8 \ + # LOCALES include ../gen-locales.mk $(objpfx)bug14.out: $(gen-locales) $(objpfx)scanf13.out: $(gen-locales) $(objpfx)test-vfprintf.out: $(gen-locales) $(objpfx)tst-grouping.out: $(gen-locales) +$(objpfx)tst-grouping_iterator.out: $(gen-locales) $(objpfx)tst-sprintf.out: $(gen-locales) $(objpfx)tst-sscanf.out: $(gen-locales) $(objpfx)tst-swprintf.out: $(gen-locales) diff --git a/stdio-common/grouping_iterator.c b/stdio-common/grouping_iterator.c new file mode 100644 index 0000000000..cc169e2b09 --- /dev/null +++ b/stdio-common/grouping_iterator.c @@ -0,0 +1,125 @@ +/* Iterator for inserting thousands separators into numbers. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <grouping_iterator.h> + +#include <assert.h> +#include <limits.h> +#include <locale/localeinfo.h> +#include <stdint.h> +#include <string.h> + +/* Initializes *IT with no grouping information for a string of length + DIGITS, and return false to indicate no grouping. */ +bool +__grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) +{ + memset (it, 0, sizeof (*it)); + it->remaining_in_current_group = digits; + it->remaining = digits; + return false; +} + +static bool +grouping_iterator_setup (struct grouping_iterator *it, size_t digits, + const char *grouping) +{ + /* We treat all negative values like CHAR_MAX. */ + + if (*grouping == CHAR_MAX || *grouping <= 0) + /* No grouping should be done. */ + return __grouping_iterator_init_none (it, digits); + + size_t remaining_to_group = digits; + size_t non_repeating_groups = 0; + size_t groups = 0; + while (true) + { + non_repeating_groups += *grouping; + if (remaining_to_group <= (unsigned int) *grouping) + break; + + ++groups; + remaining_to_group -= *grouping++; + + if (*grouping == CHAR_MAX +#if CHAR_MIN < 0 + || *grouping < 0 +#endif + ) + /* No more grouping should be done. */ + break; + else if (*grouping == 0) + { + /* Same grouping repeats. */ + --grouping; + non_repeating_groups -= *grouping; /* Over-counted. */ + size_t repeats = (remaining_to_group - 1) / *grouping; + groups += repeats; + remaining_to_group -= repeats * *grouping; + break; + } + } + + it->remaining_in_current_group = remaining_to_group; + it->remaining = digits; + it->groupings = grouping; + it->non_repeating_groups = non_repeating_groups; + it->separators = groups; + return it->separators > 0; +} + +/* Returns the appropriate grouping item in LOC depending on CATEGORY + (which must be LC_MONETARY or LC_NUMERIC). */ +static const char * +get_grouping (int category, locale_t loc) +{ + return _nl_lookup (loc, category, + category == LC_MONETARY ? MON_GROUPING : GROUPING); +} + + +bool +__grouping_iterator_init (struct grouping_iterator *it, + int category, locale_t loc, size_t digits) +{ + if (digits <= 1) + return __grouping_iterator_init_none (it, digits); + else + return grouping_iterator_setup (it, digits, get_grouping (category, loc)); +} + +bool +__grouping_iterator_next (struct grouping_iterator *it) +{ + assert (it->remaining > 0); + --it->remaining; + + if (it->remaining_in_current_group > 0) + { + --it->remaining_in_current_group; + return false; + } + + /* If we are in the non-repeating part, switch group. */ + if (it->remaining < it->non_repeating_groups) + --it->groupings; + + it->remaining_in_current_group = *it->groupings - 1; + return true; +} diff --git a/stdio-common/grouping_iterator.h b/stdio-common/grouping_iterator.h new file mode 100644 index 0000000000..ca41a7fdc1 --- /dev/null +++ b/stdio-common/grouping_iterator.h @@ -0,0 +1,65 @@ +/* Iterator for grouping a number while scanning it forward. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef GROUPING_ITERATOR_H +#define GROUPING_ITERATOR_H + +#include <locale.h> +#include <stdbool.h> +#include <stddef.h> + +struct grouping_iterator +{ + /* Number of characters in the current group. If this reaches zero, + a thousands separator needs to be emittted. */ + size_t remaining_in_current_group; + + /* Number of characters remaining in the number. This is used to + detect the start of the non-repeating groups. */ + size_t remaining; + + /* Points to the current grouping descriptor. */ + const char *groupings; + + /* Total number of characters in the non-repeating groups. */ + size_t non_repeating_groups; + + /* Number of separators that will be inserted if the whole number is + processed. (Does not change during iteration.) */ + size_t separators; +}; + +struct __locale_data; + +/* Initializes *IT with the data from LOCDATA (which must be for + LC_MONETARY or LC_NUMERIC). DIGITS is the length of the number. + Returns true if grouping is active, false if not. */ +bool __grouping_iterator_init (struct grouping_iterator *it, + int category, locale_t loc, + size_t digits) attribute_hidden; + +/* Initializes *IT with no grouping information for a string of length + DIGITS, and return false to indicate no grouping. */ +bool __grouping_iterator_init_none (struct grouping_iterator *it, size_t digits) + attribute_hidden; + +/* Advances to the next character and returns true if a thousands + separator should be inserted before emitting it. */ +bool __grouping_iterator_next (struct grouping_iterator *it); + +#endif /* GROUPING_ITERATOR_H */ diff --git a/stdio-common/tst-grouping_iterator.c b/stdio-common/tst-grouping_iterator.c new file mode 100644 index 0000000000..97d8f40628 --- /dev/null +++ b/stdio-common/tst-grouping_iterator.c @@ -0,0 +1,262 @@ +/* Test for struct grouping_iterator. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Rebuild the fail to access internal-only functions. */ +#include <grouping_iterator.c> + +#include <stdio.h> +#include <support/check.h> +#include <support/support.h> +#include <support/test-driver.h> + +static void +check (int lineno, const char *groupings, + const char *input, const char *expected) +{ + if (test_verbose) + { + printf ("info: %s:%d: \"%s\" via \"", __FILE__, lineno, input); + for (const char *p = groupings; *p != 0; ++p) + printf ("\\%o", *p & 0xff); + printf ("\" to \"%s\"\n", expected); + } + + size_t initial_group = strchrnul (expected, '\'') - expected; + size_t separators = 0; + for (const char *p = expected; *p != '\0'; ++p) + separators += *p == '\''; + + size_t digits = strlen (input); + char *out = xmalloc (2 * digits + 1); + + struct grouping_iterator it; + TEST_COMPARE (grouping_iterator_setup (&it, digits, groupings), + strchr (expected, '\'') != NULL); + TEST_COMPARE (it.remaining, digits); + TEST_COMPARE (it.remaining_in_current_group, initial_group); + TEST_COMPARE (it.separators, separators); + + char *p = out; + while (*input != '\0') + { + if (__grouping_iterator_next (&it)) + *p++ = '\''; + TEST_COMPARE (it.separators, separators); + *p++ = *input++; + } + *p++ = '\0'; + + TEST_COMPARE (it.remaining, 0); + TEST_COMPARE (it.remaining_in_current_group, 0); + + TEST_COMPARE_STRING (out, expected); + + free (out); +} + +static int +do_test (void) +{ + check (__LINE__, "", "1", "1"); + check (__LINE__, "", "12", "12"); + check (__LINE__, "", "123", "123"); + check (__LINE__, "", "1234", "1234"); + + check (__LINE__, "\3", "1", "1"); + check (__LINE__, "\3", "12", "12"); + check (__LINE__, "\3", "123", "123"); + check (__LINE__, "\3", "1234", "1'234"); + check (__LINE__, "\3", "12345", "12'345"); + check (__LINE__, "\3", "123456", "123'456"); + check (__LINE__, "\3", "1234567", "1'234'567"); + check (__LINE__, "\3", "12345678", "12'345'678"); + check (__LINE__, "\3", "123456789", "123'456'789"); + check (__LINE__, "\3", "1234567890", "1'234'567'890"); + + check (__LINE__, "\2\3", "1", "1"); + check (__LINE__, "\2\3", "12", "12"); + check (__LINE__, "\2\3", "123", "1'23"); + check (__LINE__, "\2\3", "1234", "12'34"); + check (__LINE__, "\2\3", "12345", "123'45"); + check (__LINE__, "\2\3", "123456", "1'234'56"); + check (__LINE__, "\2\3", "1234567", "12'345'67"); + check (__LINE__, "\2\3", "12345678", "123'456'78"); + check (__LINE__, "\2\3", "123456789", "1'234'567'89"); + check (__LINE__, "\2\3", "1234567890", "12'345'678'90"); + + check (__LINE__, "\3\2", "1", "1"); + check (__LINE__, "\3\2", "12", "12"); + check (__LINE__, "\3\2", "123", "123"); + check (__LINE__, "\3\2", "1234", "1'234"); + check (__LINE__, "\3\2", "12345", "12'345"); + check (__LINE__, "\3\2", "123456", "1'23'456"); + check (__LINE__, "\3\2", "1234567", "12'34'567"); + check (__LINE__, "\3\2", "12345678", "1'23'45'678"); + check (__LINE__, "\3\2", "123456789", "12'34'56'789"); + check (__LINE__, "\3\2", "1234567890", "1'23'45'67'890"); + + check (__LINE__, "\3\2\1", "1", "1"); + check (__LINE__, "\3\2\1", "12", "12"); + check (__LINE__, "\3\2\1", "123", "123"); + check (__LINE__, "\3\2\1", "1234", "1'234"); + check (__LINE__, "\3\2\1", "12345", "12'345"); + check (__LINE__, "\3\2\1", "123456", "1'23'456"); + check (__LINE__, "\3\2\1", "1234567", "1'2'34'567"); + check (__LINE__, "\3\2\1", "12345678", "1'2'3'45'678"); + check (__LINE__, "\3\2\1", "123456789", "1'2'3'4'56'789"); + check (__LINE__, "\3\2\1", "1234567890", "1'2'3'4'5'67'890"); + + check (__LINE__, "\2\3\1", "1", "1"); + check (__LINE__, "\2\3\1", "12", "12"); + check (__LINE__, "\2\3\1", "123", "1'23"); + check (__LINE__, "\2\3\1", "1234", "12'34"); + check (__LINE__, "\2\3\1", "12345", "123'45"); + check (__LINE__, "\2\3\1", "123456", "1'234'56"); + check (__LINE__, "\2\3\1", "1234567", "1'2'345'67"); + check (__LINE__, "\2\3\1", "12345678", "1'2'3'456'78"); + check (__LINE__, "\2\3\1", "123456789", "1'2'3'4'567'89"); + check (__LINE__, "\2\3\1", "1234567890", "1'2'3'4'5'678'90"); + + /* No repeats. */ + check (__LINE__, "\3\377", "1", "1"); + check (__LINE__, "\3\377", "12", "12"); + check (__LINE__, "\3\377", "123", "123"); + check (__LINE__, "\3\377", "1234", "1'234"); + check (__LINE__, "\3\377", "12345", "12'345"); + check (__LINE__, "\3\377", "123456", "123'456"); + check (__LINE__, "\3\377", "1234567", "1234'567"); + check (__LINE__, "\3\377", "12345678", "12345'678"); + + check (__LINE__, "\2\3\377", "1", "1"); + check (__LINE__, "\2\3\377", "12", "12"); + check (__LINE__, "\2\3\377", "123", "1'23"); + check (__LINE__, "\2\3\377", "1234", "12'34"); + check (__LINE__, "\2\3\377", "12345", "123'45"); + check (__LINE__, "\2\3\377", "123456", "1'234'56"); + check (__LINE__, "\2\3\377", "1234567", "12'345'67"); + check (__LINE__, "\2\3\377", "12345678", "123'456'78"); + check (__LINE__, "\2\3\377", "123456789", "1234'567'89"); + check (__LINE__, "\2\3\377", "1234567890", "12345'678'90"); + + check (__LINE__, "\3\2\377", "1", "1"); + check (__LINE__, "\3\2\377", "12", "12"); + check (__LINE__, "\3\2\377", "123", "123"); + check (__LINE__, "\3\2\377", "1234", "1'234"); + check (__LINE__, "\3\2\377", "12345", "12'345"); + check (__LINE__, "\3\2\377", "123456", "1'23'456"); + check (__LINE__, "\3\2\377", "1234567", "12'34'567"); + check (__LINE__, "\3\2\377", "12345678", "123'45'678"); + check (__LINE__, "\3\2\377", "123456789", "1234'56'789"); + check (__LINE__, "\3\2\377", "1234567890", "12345'67'890"); + + /* Locale-based tests. */ + + locale_t loc; + struct lc_ctype_data *ctype; + struct grouping_iterator it; + + loc = newlocale (LC_ALL_MASK, "de_DE.UTF-8", 0); + TEST_VERIFY_EXIT (loc != 0); + ctype = loc->__locales[LC_CTYPE]->private; + TEST_VERIFY (!ctype->outdigit_translation_needed); + for (int i = 0; i <= 9; ++i) + TEST_COMPARE (ctype->outdigit_bytes[i], 1); + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1); + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ + TEST_COMPARE (it.separators, 2); + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ + TEST_COMPARE (it.separators, 2); + freelocale (loc); + + loc = newlocale (LC_ALL_MASK, "tg_TJ.UTF-8", 0); + TEST_VERIFY_EXIT (loc != 0); + ctype = loc->__locales[LC_CTYPE]->private; + TEST_VERIFY (!ctype->outdigit_translation_needed); + for (int i = 0; i <= 9; ++i) + TEST_COMPARE (ctype->outdigit_bytes[i], 1); + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1); + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ + TEST_COMPARE (it.separators, 2); + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */ + TEST_COMPARE (it.separators, 2); + freelocale (loc); + + loc = newlocale (LC_ALL_MASK, "hi_IN.UTF-8", 0); + TEST_VERIFY_EXIT (loc != 0); + ctype = loc->__locales[LC_CTYPE]->private; + TEST_VERIFY (ctype->outdigit_translation_needed); + for (int i = 0; i <= 9; ++i) + /* Locale uses Devanagari digits. */ + TEST_COMPARE (ctype->outdigit_bytes[i], 3); + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 3); + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 0); + TEST_COMPARE (it.separators, 2); + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 1); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 2); + TEST_COMPARE (it.non_repeating_groups, 3); + TEST_COMPARE (it.separators, 3); + freelocale (loc); + + loc = newlocale (LC_ALL_MASK, "ps_AF.UTF-8", 0); + TEST_VERIFY_EXIT (loc != 0); + ctype = loc->__locales[LC_CTYPE]->private; + TEST_VERIFY (ctype->outdigit_translation_needed); + for (int i = 0; i <= 9; ++i) + /* Locale uses non-ASCII digits. */ + TEST_COMPARE (ctype->outdigit_bytes[i], 2); + TEST_COMPARE (ctype->outdigit_bytes_all_equal, 2); + TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 0); + TEST_COMPARE (it.separators, 2); + TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true); + TEST_COMPARE (it.remaining_in_current_group, 2); + TEST_COMPARE (it.remaining, 8); + TEST_COMPARE (*it.groupings, 3); + TEST_COMPARE (it.non_repeating_groups, 0); + TEST_COMPARE (it.separators, 2); + freelocale (loc); + + return 0; +} + +#include <support/test-driver.c>