[uclibc-ng-devel] towlower and locales

Submitted by Eugene Yudin on Aug. 2, 2017, 9:50 p.m.

Details

Message ID CAPmKWpHpQT5JF8yd0q1zUtNYFaECY_6dzU8RiPdHhrPQoLsimw@mail.gmail.com
State New
Headers show

Commit Message

Eugene Yudin Aug. 2, 2017, 9:50 p.m.
Hi, Waldemar,

I cannot say anything without verbose build.
Unfortunately we can see fatal errors only in that mode.
I attached patch to make errors visible.

Do you know linux distro which is used at this build machine?
It would be very helpful to reproduce the problem.

On Wed, Aug 2, 2017 at 11:10 PM, Waldemar Brodkorb <wbx@uclibc-ng.org>
wrote:

> Hi Eugene,
>
> I added the patch to 1.0.26, but buildroot shows some
> regressions:
> http://autobuild.buildroot.net/results/d5ba81eea9223569ba5b363551c4a2
> f7044ff8ec
>
> Any idea what is wrong?
>
> best regards
>  Waldemar
>
> Eugene Yudin wrote,
>
> > Hi,
> >
> > The function towlower doesn't work with locales diffrent from C.
> > Issue was introduced in this commit:
> > https://cgit.openadk.org/cgi/cgit/uclibc-ng.git/commit/?id=
> > 8cde3a9bf2856dcb9a759dec7ecb04a68e712254
> > Call to setlocale is needed for correct generation of the table
> uplow_diff.
> > I received the compile time error "range assumption error" after
> uncommenting
> > the call.
> > Similar problem described here:
> > http://lists.uclibc.org/pipermail/uclibc/2015-March/048852.html
> >
> > The attached patch fix the problem by using int32_t values.
> >
> > Test program:
> > $ cat test.c
> > #include <locale.h>
> > #include <stdio.h>
> > #include <wchar.h>
> > #include <wctype.h>
> >
> > int main(int argc, char *argv[])
> > {
> > int i = 0;
> > wchar_t str[] = L"ТЕСТОВАЯ СТРОКА";
> > wchar_t c;
> >
> > setlocale(LC_ALL, "ru_RU.utf-8");
> >
> > wprintf(L"Input:\t\"%ls\"\n", str);
> > wprintf(L"Output:\t\"");
> >
> > while (str[i]) {
> > c = str[i];
> > putwchar(towlower(c));
> > i++;
> > }
> >
> > wprintf(L"\"\n");
> >
> > return 0;
> > }
> >
> > Output (without patch):
> > $ ./test
> > Input: "ТЕСТОВАЯ СТРОКА"
> > Output: "ТЕСТОВАЯ СТРОКА"
> >
> > Output (with patch):
> > $ ./test
> > Input: "ТЕСТОВАЯ СТРОКА"
> > Output: "тестовая строка"
> >
> > --
> > Best regards,
> > Eugene
>
> > diff --git a/extra/locale/gen_ldc.c b/extra/locale/gen_ldc.c
> > index 2cedbdd..5f45402 100644
> > --- a/extra/locale/gen_ldc.c
> > +++ b/extra/locale/gen_ldc.c
> > @@ -129,6 +129,20 @@ void out_i16(FILE *f, const int16_t *p, size_t n,
> char *comment)
> >       fprintf(f, "\n},\n");
> >  }
> >
> > +void out_i32(FILE *f, const int32_t *p, size_t n, char *comment)
> > +{
> > +     size_t i;
> > +
> > +     fprintf(f, "{\t/* %s */", comment);
> > +     for (i = 0 ; i < n ; i++) {
> > +             if (!(i & 7)) {
> > +                     fprintf(f, "\n\t");
> > +             }
> > +             fprintf(f, "%11d, ", p[i]);
> > +     }
> > +     fprintf(f, "\n},\n");
> > +}
> > +
> >  void out_size_t(FILE *f, const size_t *p, size_t n, char *comment)
> >  {
> >       size_t i;
> > @@ -194,7 +208,7 @@ int main(int argc, char **argv)
> >  #ifdef __WCHAR_ENABLED
> >       out_uc(lso, __LOCALE_DATA_WCctype_data,
> __LOCALE_DATA_WCctype_TBL_LEN, "tblwctype");
> >       out_uc(lso, __LOCALE_DATA_WCuplow_data,
> __LOCALE_DATA_WCuplow_TBL_LEN, "tblwuplow");
> > -     out_i16(lso, __LOCALE_DATA_WCuplow_diff_data,
> __LOCALE_DATA_WCuplow_diff_TBL_LEN, "tblwuplow_diff");
> > +     out_i32(lso, __LOCALE_DATA_WCuplow_diff_data,
> __LOCALE_DATA_WCuplow_diff_TBL_LEN, "tblwuplow_diff");
> >  /*   const unsigned char tblwcomb[WCcomb_TBL_LEN]; */
> >       /* width?? */
> >  #endif /* __WCHAR_ENABLED */
> > diff --git a/extra/locale/gen_wctype.c b/extra/locale/gen_wctype.c
> > index 7034509..99c505d 100644
> > --- a/extra/locale/gen_wctype.c
> > +++ b/extra/locale/gen_wctype.c
> > @@ -83,8 +83,8 @@
> >  #define mywxdigit(D,C) (mywdigit(D,C) || (unsigned)(((C) | 0x20) - 'a')
> <= 5)
> >
> >  typedef struct {
> > -     short l;
> > -     short u;
> > +     int32_t l;
> > +     int32_t u;
> >  } uldiff_entry;
> >
> >  typedef struct {
> > @@ -227,12 +227,11 @@ int main(int argc, char **argv)
> >                       ++verbose;
> >                       continue;
> >               }
> > -     /* setlocale might be just a stub */
> > -     /*      if (!setlocale(LC_CTYPE, *argv)) {
> > +             /* setlocale might be just a stub */
> > +             if (!setlocale(LC_CTYPE, *argv)) {
> >                       verbose_msg("setlocale(LC_CTYPE,%s) failed!
> Skipping this locale...\n", *argv);
> >                       continue;
> >               }
> > -     */
> >               if (!(totitle = wctrans("totitle"))) {
> >                       verbose_msg("no totitle transformation.\n");
> >               }
> > @@ -402,7 +401,7 @@ int main(int argc, char **argv)
> >                               u = (long)(int) towupper(c) - c;
> >                               ult[c] = 0;
> >                               if (l || u) {
> > -                                     if ((l != (short)l) || (u !=
> (short)u)) {
> > +                                     if ((l != (int32_t)l) || (u !=
> (int32_t)u)) {
> >                                               verbose_msg("range
> assumption error!  %x  %ld  %ld\n", c, l, u);
> >                                               return EXIT_FAILURE;
> >                                       }
> > @@ -684,7 +683,7 @@ int main(int argc, char **argv)
> >
> >               printf("#define __LOCALE_DATA_WCuplow_diffs  %7u\n",
> ul_count);
> >               printf("\n#ifdef WANT_WCuplow_diff_data\n\n");
> > -             printf("\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu]
> = {",
> > +             printf("\nstatic const int32_t __LOCALE_DATA_WCuplow_diff_data[%zu]
> = {",
> >                          2 * (size_t) ul_count);
> >               for (i = 0; i < ul_count; i++) {
> >                       if (i % 4 == 0) {
> > diff --git a/extra/locale/locale_mmap.h b/extra/locale/locale_mmap.h
> > index 5b0df90..d0ae9af 100644
> > --- a/extra/locale/locale_mmap.h
> > +++ b/extra/locale/locale_mmap.h
> > @@ -45,7 +45,7 @@ typedef struct {
> >  #ifdef __WCHAR_ENABLED
> >       const unsigned char tblwctype[__LOCALE_DATA_WCctype_TBL_LEN];
> >       const unsigned char tblwuplow[__LOCALE_DATA_WCuplow_TBL_LEN];
> > -     const int16_t tblwuplow_diff[__LOCALE_DATA_WCuplow_diff_TBL_LEN];
> > +     const int32_t tblwuplow_diff[__LOCALE_DATA_WCuplow_diff_TBL_LEN];
> >  /*   const unsigned char tblwcomb[WCcomb_TBL_LEN]; */
> >       /* width?? */
> >  #endif
> > diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c
> > index e38792b..d555f5d 100644
> > --- a/libc/misc/locale/locale.c
> > +++ b/libc/misc/locale/locale.c
> > @@ -820,7 +820,7 @@ void attribute_hidden _locale_init_l(__locale_t base)
> >       base->tblwuplow
> >               = (const unsigned char *) &__locale_mmap->tblwuplow;
> >       base->tblwuplow_diff
> > -             = (const int16_t *) &__locale_mmap->tblwuplow_diff;
> > +             = (const int32_t *) &__locale_mmap->tblwuplow_diff;
> >  /*   base->tblwcomb */
> >  /*           = (const unsigned char *) &__locale_mmap->tblwcomb; */
> >       /* width?? */
> > diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h
> b/libc/sysdeps/linux/common/bits/uClibc_locale.h
> > index 6598eaf..43c1e51 100644
> > --- a/libc/sysdeps/linux/common/bits/uClibc_locale.h
> > +++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h
> > @@ -192,7 +192,7 @@ struct __uclibc_locale_struct {
> >       const unsigned char *tblwctype;
> >       const unsigned char *tblwuplow;
> >  /*   const unsigned char *tblwcomb; */
> > -     const int16_t *tblwuplow_diff; /* yes... signed */
> > +     const int32_t *tblwuplow_diff; /* yes... signed */
> >       /* width?? */
> >
> >       wchar_t decimal_point_wc;
>
> > _______________________________________________
> > devel mailing list
> > devel@uclibc-ng.org
> > https://mailman.uclibc-ng.org/cgi-bin/mailman/listinfo/devel
>
>

Comments

Max Filippov Aug. 2, 2017, 10:49 p.m.
Hello,

On Wed, Aug 2, 2017 at 2:50 PM, Eugene Yudin <e.yudin@ndmsystems.com> wrote:
> I cannot say anything without verbose build.
> Unfortunately we can see fatal errors only in that mode.
> I attached patch to make errors visible.
>
> Do you know linux distro which is used at this build machine?
> It would be very helpful to reproduce the problem.

I've tried, but couldn't reproduce the original buildroot build error.
But looking at the change I see that now gen_wctype may return
with error status if it couldn't do setlocale. It is invoked like this:

for locale in en_US en_US en_GB; do \
        ../..//extra/locale/gen_wctype -v -v $locale >
../..//extra/locale/wctables.h || \
        ../..//extra/locale/gen_wctype -v -v $locale.UTF-8 >
../..//extra/locale/wctables.h || \
        ../..//extra/locale/gen_wctype -v -v $locale.iso8859-1 >
../..//extra/locale/wctables.h && break; \
done

and it fails on my system for the en_US, but works for
en_US.UTF-8. Could it be that the buildroot buildbot doesn't have
any usable locale installed?
Should there be some fallback for this case?

Patch hide | download patch | download mbox

diff --git a/extra/locale/gen_wctype.c b/extra/locale/gen_wctype.c
index 99c505d..03b8afb 100644
--- a/extra/locale/gen_wctype.c
+++ b/extra/locale/gen_wctype.c
@@ -359,7 +359,7 @@  int main(int argc, char **argv)
 					if (mywxdigit(d,c)) ++mine;
 
 					if (curr_stdclib != mine) {
-						verbose_msg("%#8x : curr_stdclib %#4x != %#4x mine  %u\n", c, curr_stdclib, mine, d);
+						fprintf(stderr, "%#8x : curr_stdclib %#4x != %#4x mine  %u\n", c, curr_stdclib, mine, d);
 						return EXIT_FAILURE;
 					}
 #if 0
@@ -402,7 +402,7 @@  int main(int argc, char **argv)
 				ult[c] = 0;
 				if (l || u) {
 					if ((l != (int32_t)l) || (u != (int32_t)u)) {
-						verbose_msg("range assumption error!  %x  %ld  %ld\n", c, l, u);
+						fprintf(stderr, "range assumption error!  %x  %ld  %ld\n", c, l, u);
 						return EXIT_FAILURE;
 					}
 					for (i = 0; i < ul_count; i++) {
@@ -414,7 +414,7 @@  int main(int argc, char **argv)
 					uldiff[ul_count].u = u;
 					++ul_count;
 					if (ul_count > MAXTO) {
-						verbose_msg("too many touppers/tolowers!\n");
+						fprintf(stderr, "too many touppers/tolowers!\n");
 						return EXIT_FAILURE;
 					}
  found:
@@ -441,7 +441,7 @@  int main(int argc, char **argv)
 			}
 			verbose_msg("smallest = %zu\n", smallest);
 			if (!(cttable.ii = malloc(smallest))) {
-				verbose_msg("couldn't allocate space!\n");
+				fprintf(stderr, "couldn't allocate space!\n");
 				return EXIT_FAILURE;
 			}
 			smallest = SIZE_MAX;
@@ -467,7 +467,7 @@  int main(int argc, char **argv)
 			);
 			verbose_msg("smallest = %zu\n", smallest);
 			if (!(ultable.ii = malloc(smallest))) {
-				verbose_msg("couldn't allocate space!\n");
+				fprintf(stderr, "couldn't allocate space!\n");
 				return EXIT_FAILURE;
 			}
 			smallest = SIZE_MAX;
@@ -487,7 +487,7 @@  int main(int argc, char **argv)
 			}
 			verbose_msg("smallest = %zu\n", smallest);
 			if (!(combtable.ii = malloc(smallest))) {
-				verbose_msg("couldn't allocate space!\n");
+				fprintf(stderr, "couldn't allocate space!\n");
 				return EXIT_FAILURE;
 			}
 			smallest = SIZE_MAX;
@@ -509,7 +509,7 @@  int main(int argc, char **argv)
 			}
 			verbose_msg("smallest = %zu\n", smallest);
 			if (!(widthtable.ii = malloc(smallest))) {
-				verbose_msg("couldn't allocate space!\n");
+				fprintf(stderr, "couldn't allocate space!\n");
 				return EXIT_FAILURE;
 			}
 			smallest = SIZE_MAX;
@@ -531,7 +531,7 @@  int main(int argc, char **argv)
 			}
 			verbose_msg("smallest = %zu\n", smallest);
 			if (!(comb3table.ii = malloc(smallest))) {
-				verbose_msg("couldn't allocate space!\n");
+				fprintf(stderr, "couldn't allocate space!\n");
 				return EXIT_FAILURE;
 			}
 			smallest = SIZE_MAX;