Message ID | mvm7fo1pbwy.fsf@hawking.suse.de |
---|---|
State | New |
Headers | show |
On 09/08/2015 02:12 AM, Andreas Schwab wrote: > When a SI character has been emitted, but the next character no longer > fits into the buffer we leave the loop without updating our internal > shift state, causing a second SI character to be emitted at the start of > the next call. Andreas, I've tested the patch in an older release of glibc (2-17) with a large input file submitted by a customer of ours running RHEL 6.8 and it fixed the problem. If you still think it's a good fix, it would be great to get it committed. Otherwise, if you are aware of problems with it, can you say what they are? Thanks Martin > > Andreas. > > [BZ #17197] > * iconvdata/ibm930.c (BODY for TO_LOOP): Record current DBCS state > immediately after emitting SI. > * iconvdata/ibm933.c (BODY for TO_LOOP): Likewise. > * iconvdata/ibm935.c (BODY for TO_LOOP): Likewise. > * iconvdata/ibm937.c (BODY for TO_LOOP): Likewise. > * iconvdata/ibm939.c (BODY for TO_LOOP): Likewise. > * iconvdata/bug-iconv10.c: New file. > * iconvdata/Makefile (tests): Add bug-iconv10. > ($(objpfx)bug-iconv10.out): New rule. > --- > iconvdata/Makefile | 5 +++- > iconvdata/bug-iconv10.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ > iconvdata/ibm930.c | 2 +- > iconvdata/ibm933.c | 2 +- > iconvdata/ibm935.c | 2 +- > iconvdata/ibm937.c | 2 +- > iconvdata/ibm939.c | 2 +- > 7 files changed, 86 insertions(+), 6 deletions(-) > create mode 100644 iconvdata/bug-iconv10.c > > diff --git a/iconvdata/Makefile b/iconvdata/Makefile > index a3d1d09..0c952b3 100644 > --- a/iconvdata/Makefile > +++ b/iconvdata/Makefile > @@ -67,7 +67,8 @@ modules.so := $(addsuffix .so, $(modules)) > > ifeq (yes,$(build-shared)) > tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ > - tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 > + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ > + bug-iconv10 > ifeq ($(have-thread-library),yes) > tests += bug-iconv3 > endif > @@ -298,6 +299,8 @@ $(objpfx)tst-iconv4.out: $(objpfx)gconv-modules \ > $(addprefix $(objpfx),$(modules.so)) > $(objpfx)tst-iconv7.out: $(objpfx)gconv-modules \ > $(addprefix $(objpfx),$(modules.so)) > +$(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ > + $(addprefix $(objpfx),$(modules.so)) > > $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ > $(addprefix $(objpfx),$(modules.so)) \ > diff --git a/iconvdata/bug-iconv10.c b/iconvdata/bug-iconv10.c > new file mode 100644 > index 0000000..98353a2 > --- /dev/null > +++ b/iconvdata/bug-iconv10.c > @@ -0,0 +1,77 @@ > +/* bug 17197: check for redundant shift character at block boundary. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <iconv.h> > +#include <locale.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <errno.h> > + > +static int > +do_test (void) > +{ > + iconv_t cd = iconv_open ("IBM930", "UTF-8"); > + if (cd == (iconv_t) -1) > + { > + puts ("iconv_open failed"); > + return 1; > + } > + > + char instr1[] = "\xc2\xa6."; > + const char expstr1[4] = "\016Bj\017"; > + const char expstr2[] = "K"; > + char outstr[4]; > + size_t inlen = sizeof (instr1); > + size_t outlen = sizeof (outstr); > + char *inptr = instr1; > + char *outptr = outstr; > + size_t r = iconv (cd, &inptr, &inlen, &outptr, &outlen); > + if (r != -1 > + || errno != E2BIG > + || inlen != sizeof (instr1) - 2 > + || inptr != instr1 + 2 > + || outlen != 0 > + || memcmp (outstr, expstr1, sizeof (expstr1)) != 0) > + { > + puts ("wrong first conversion"); > + return 1; > + } > + > + outlen = sizeof (outstr); > + outptr = outstr; > + r = iconv (cd, &inptr, &inlen, &outptr, &outlen); > + if (r != 0 > + || inlen != 0 > + || outlen != sizeof (outstr) - sizeof (expstr2) > + || memcmp (outstr, expstr2, sizeof (expstr2)) != 0) > + { > + puts ("wrong second conversion"); > + return 1; > + } > + > + if (iconv_close (cd) != 0) > + { > + puts ("iconv_close failed"); > + return 1; > + } > + return 0; > +} > + > +#define TEST_FUNCTION do_test () > +#include "../test-skeleton.c" > diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c > index 91327f1..488c4a0 100644 > --- a/iconvdata/ibm930.c > +++ b/iconvdata/ibm930.c > @@ -256,6 +256,7 @@ enum > break; \ > } \ > *outptr++ = SI; \ > + curcs = sb; \ > } \ > \ > if (__glibc_unlikely (outptr + 1 > outend)) \ > @@ -269,7 +270,6 @@ enum > *outptr++ = 0x5b; \ > else \ > *outptr++ = cp[0]; \ > - curcs = sb; \ > } \ > \ > /* Now that we wrote the output increment the input pointer. */ \ > diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c > index d1f3f05..e0ceda7 100644 > --- a/iconvdata/ibm933.c > +++ b/iconvdata/ibm933.c > @@ -255,6 +255,7 @@ enum > break; \ > } \ > *outptr++ = SI; \ > + curcs = sb; \ > } \ > \ > if (__glibc_unlikely (outptr + 1 > outend)) \ > @@ -263,7 +264,6 @@ enum > break; \ > } \ > *outptr++ = cp[0]; \ > - curcs = sb; \ > } \ > \ > /* Now that we wrote the output increment the input pointer. */ \ > diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c > index afb3449..e327a1a 100644 > --- a/iconvdata/ibm935.c > +++ b/iconvdata/ibm935.c > @@ -255,6 +255,7 @@ enum > break; \ > } \ > *outptr++ = SI; \ > + curcs = sb; \ > } \ > \ > if (__glibc_unlikely (outptr + 1 > outend)) \ > @@ -263,7 +264,6 @@ enum > break; \ > } \ > *outptr++ = cp[0]; \ > - curcs = sb; \ > } \ > \ > /* Now that we wrote the output increment the input pointer. */ \ > diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c > index 744f32f..f6ae243 100644 > --- a/iconvdata/ibm937.c > +++ b/iconvdata/ibm937.c > @@ -255,6 +255,7 @@ enum > break; \ > } \ > *outptr++ = SI; \ > + curcs = sb; \ > } \ > \ > if (__glibc_unlikely (outptr + 1 > outend)) \ > @@ -263,7 +264,6 @@ enum > break; \ > } \ > *outptr++ = cp[0]; \ > - curcs = sb; \ > } \ > \ > /* Now that we wrote the output increment the input pointer. */ \ > diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c > index 3b189dd..8bf7c19 100644 > --- a/iconvdata/ibm939.c > +++ b/iconvdata/ibm939.c > @@ -255,6 +255,7 @@ enum > break; \ > } \ > *outptr++ = SI; \ > + curcs = sb; \ > } \ > \ > if (__glibc_unlikely (outptr + 1 > outend)) \ > @@ -268,7 +269,6 @@ enum > *outptr++ = 0xb2; \ > else \ > *outptr++ = cp[0]; \ > - curcs = sb; \ > } \ > \ > /* Now that we wrote the output increment the input pointer. */ \ >
Martin Sebor <msebor@gmail.com> writes: > I've tested the patch in an older release of glibc (2-17) with > a large input file submitted by a customer of ours running RHEL > 6.8 and it fixed the problem. If you still think it's a good > fix, it would be great to get it committed. Otherwise, if you > are aware of problems with it, can you say what they are? I'm not aware of any problems. Andreas.
On 12/10/2015 01:29 AM, Andreas Schwab wrote: > Martin Sebor <msebor@gmail.com> writes: > >> I've tested the patch in an older release of glibc (2-17) with >> a large input file submitted by a customer of ours running RHEL >> 6.8 and it fixed the problem. If you still think it's a good >> fix, it would be great to get it committed. Otherwise, if you >> are aware of problems with it, can you say what they are? > > I'm not aware of any problems. Great, thank you for confirming that. Will you be committing the patch sometime soon or is something standing in the way? Anything I can help with? Martin
Martin Sebor <msebor@gmail.com> writes: > Great, thank you for confirming that. Will you be committing > the patch sometime soon or is something standing in the way? Lack of review. Andreas.
On 12/14/2015 01:46 AM, Andreas Schwab wrote: > Martin Sebor <msebor@gmail.com> writes: > >> Great, thank you for confirming that. Will you be committing >> the patch sometime soon or is something standing in the way? > > Lack of review. Yes, I see you submitted the patch for review a number of times with no response. For what it's worth, I've reviewed and successfully tested the patch. I don't know the glibc commit process well enough to tell if someone else needs to review and approve it and who that might be in this case. However, based on my reading of the Consensus page on the wiki I would expect anyone to be able to commit this patch with only no approval (I don't see the iconvdata component mentioned on in the Reviewers by component section on the MAINTAINERS page): Anyone can commit a locale related change where a bugzilla issue exists, government sources are cited, common uses are cited, and if there is an original author for the locale, that original author ACKs the change. No developer review required. Post the patch and ChangeLog to libc-alpha with a short message and then push the commit. Do you agree it's sufficient to go ahead with the commit or do you think someone else needs to review and approve the patch? If the latter, do you know who that might be? Incidentally, while searching for more background on this bug I came across a commit from 2014 of the same patch into the openSUSE repository: https://www.mail-archive.com/opensuse-commit@opensuse.org/msg61322.html If another review is required, hopefully that will help increase confidence in the patch so that it can be approved for commit here. Thanks Martin
On 12/14/2015 05:23 PM, Martin Sebor wrote: > On 12/14/2015 01:46 AM, Andreas Schwab wrote: >> Martin Sebor <msebor@gmail.com> writes: >> >>> Great, thank you for confirming that. Will you be committing >>> the patch sometime soon or is something standing in the way? >> >> Lack of review. > > Yes, I see you submitted the patch for review a number of times with > no response. > > For what it's worth, I've reviewed and successfully tested the patch. > > I don't know the glibc commit process well enough to tell if someone > else needs to review and approve it and who that might be in this > case. However, based on my reading of the Consensus page on the wiki > I would expect anyone to be able to commit this patch with only no > approval (I don't see the iconvdata component mentioned on in the > Reviewers by component section on the MAINTAINERS page): > > Anyone can commit a locale related change where a bugzilla issue > exists, government sources are cited, common uses are cited, and > if there is an original author for the locale, that original author > ACKs the change. No developer review required. Post the patch and > ChangeLog to libc-alpha with a short message and then push the > commit. But this is not a locale patch. :) I think I understand what is going on here and how the patch fixes the bug. Andreas, looking at the other direction (specifically, the first loop in ibm930.c), it seems that an SI character while already in multi-byte mode is illegal. Does this mean the current encoder produces an illegal output sequence? Florian
Florian Weimer <fweimer@redhat.com> writes: > Andreas, looking at the other direction (specifically, the first loop in > ibm930.c), it seems that an SI character while already in multi-byte > mode is illegal. Does this mean the current encoder produces an illegal > output sequence? Yes, that looks like. Andreas.
On 12/15/2015 10:03 AM, Andreas Schwab wrote: > Florian Weimer <fweimer@redhat.com> writes: > >> Andreas, looking at the other direction (specifically, the first loop in >> ibm930.c), it seems that an SI character while already in multi-byte >> mode is illegal. Does this mean the current encoder produces an illegal >> output sequence? > > Yes, that looks like. Okay, not good, but I don't think it's worth to add a compatibility mode for this. I think this is ready to commit (if you are satisfied with Martin's and my review). Florian
diff --git a/iconvdata/Makefile b/iconvdata/Makefile index a3d1d09..0c952b3 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -67,7 +67,8 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ - tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ + bug-iconv10 ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -298,6 +299,8 @@ $(objpfx)tst-iconv4.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) $(objpfx)tst-iconv7.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) \ diff --git a/iconvdata/bug-iconv10.c b/iconvdata/bug-iconv10.c new file mode 100644 index 0000000..98353a2 --- /dev/null +++ b/iconvdata/bug-iconv10.c @@ -0,0 +1,77 @@ +/* bug 17197: check for redundant shift character at block boundary. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <iconv.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("IBM930", "UTF-8"); + if (cd == (iconv_t) -1) + { + puts ("iconv_open failed"); + return 1; + } + + char instr1[] = "\xc2\xa6."; + const char expstr1[4] = "\016Bj\017"; + const char expstr2[] = "K"; + char outstr[4]; + size_t inlen = sizeof (instr1); + size_t outlen = sizeof (outstr); + char *inptr = instr1; + char *outptr = outstr; + size_t r = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (r != -1 + || errno != E2BIG + || inlen != sizeof (instr1) - 2 + || inptr != instr1 + 2 + || outlen != 0 + || memcmp (outstr, expstr1, sizeof (expstr1)) != 0) + { + puts ("wrong first conversion"); + return 1; + } + + outlen = sizeof (outstr); + outptr = outstr; + r = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (r != 0 + || inlen != 0 + || outlen != sizeof (outstr) - sizeof (expstr2) + || memcmp (outstr, expstr2, sizeof (expstr2)) != 0) + { + puts ("wrong second conversion"); + return 1; + } + + if (iconv_close (cd) != 0) + { + puts ("iconv_close failed"); + return 1; + } + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c index 91327f1..488c4a0 100644 --- a/iconvdata/ibm930.c +++ b/iconvdata/ibm930.c @@ -256,6 +256,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -269,7 +270,6 @@ enum *outptr++ = 0x5b; \ else \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c index d1f3f05..e0ceda7 100644 --- a/iconvdata/ibm933.c +++ b/iconvdata/ibm933.c @@ -255,6 +255,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -263,7 +264,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c index afb3449..e327a1a 100644 --- a/iconvdata/ibm935.c +++ b/iconvdata/ibm935.c @@ -255,6 +255,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -263,7 +264,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c index 744f32f..f6ae243 100644 --- a/iconvdata/ibm937.c +++ b/iconvdata/ibm937.c @@ -255,6 +255,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -263,7 +264,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c index 3b189dd..8bf7c19 100644 --- a/iconvdata/ibm939.c +++ b/iconvdata/ibm939.c @@ -255,6 +255,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -268,7 +269,6 @@ enum *outptr++ = 0xb2; \ else \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \