@@ -465,29 +465,93 @@ conversion stopped due to problem in writing the output"));
return 0;
}
+static void
+report_iconv_error (size_t position)
+{
+ switch (errno)
+ {
+ case EILSEQ:
+ if (position < (size_t) -1)
+ error (0, 0,
+ _("illegal input sequence at position %zu"), position);
+ else
+ error (0, 0,
+ _("illegal input sequence past position %zu"),
+ position - 1);
+ break;
+ case EINVAL:
+ error (0, 0, _("\
+incomplete character or shift sequence at end of buffer"));
+ break;
+ case EBADF:
+ error (0, 0, _("internal error (illegal descriptor)"));
+ break;
+ default:
+ error (0, 0, _("unknown iconv() error %d"), errno);
+ break;
+ }
+}
+#define BUF_SIZE 32768
static int
-process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file)
+flush_state (iconv_t cd, FILE **output, const char *output_file, size_t offset)
{
-#define OUTBUF_SIZE 32768
- const char *start = addr;
- char outbuf[OUTBUF_SIZE];
+ char outbuf[BUF_SIZE];
+ char *outptr;
+ size_t outlen;
+ size_t n;
+
+ /* All the input test is processed. For state-dependent
+ character sets we have to flush the state now. */
+ outptr = outbuf;
+ outlen = BUF_SIZE;
+ n = iconv (cd, NULL, NULL, &outptr, &outlen);
+
+ if (outptr != outbuf)
+ {
+ if (write_output (outbuf, outptr, output, output_file) == -1)
+ return -1;
+ }
+
+ if (n == (size_t) -1)
+ {
+ report_iconv_error (offset);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+saturating_add (size_t a, size_t b)
+{
+ if ((size_t) -1 - a > b)
+ return a + b;
+ else
+ return -1;
+}
+
+static int
+process_part (iconv_t cd, char **addr, size_t *len, size_t offset,
+ FILE **output, const char *output_file)
+{
+ const char *start = *addr;
+ char outbuf[BUF_SIZE];
char *outptr;
size_t outlen;
size_t n;
int ret = 0;
- while (len > 0)
+ while (*len > 0)
{
outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, &addr, &len, &outptr, &outlen);
+ outlen = BUF_SIZE;
+ n = iconv (cd, addr, len, &outptr, &outlen);
if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
{
ret = 1;
- if (len == 0)
+ if (*len == 0)
n = 0;
else
errno = E2BIG;
@@ -500,52 +564,18 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output,
break;
}
- if (n != (size_t) -1)
+ /* Incomplete multibyte characters might be completed by the next
+ chunk, so do not treat them as an error here. */
+ if (n != (size_t) -1 || errno == EINVAL)
{
- /* All the input test is processed. For state-dependent
- character sets we have to flush the state now. */
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, NULL, NULL, &outptr, &outlen);
-
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
- if (n != (size_t) -1)
- break;
-
- if (omit_invalid && errno == EILSEQ)
- {
- ret = 1;
- break;
- }
+ ret = 0;
+ break;
}
if (errno != E2BIG)
{
/* iconv() ran into a problem. */
- switch (errno)
- {
- case EILSEQ:
- if (! omit_invalid)
- error (0, 0, _("illegal input sequence at position %ld"),
- (long int) (addr - start));
- break;
- case EINVAL:
- error (0, 0, _("\
-incomplete character or shift sequence at end of buffer"));
- break;
- case EBADF:
- error (0, 0, _("internal error (illegal descriptor)"));
- break;
- default:
- error (0, 0, _("unknown iconv() error %d"), errno);
- break;
- }
+ report_iconv_error (saturating_add (offset, (*addr - start)));
return -1;
}
@@ -554,83 +584,76 @@ incomplete character or shift sequence at end of buffer"));
return ret;
}
-
static int
-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
+process_block (iconv_t cd, char *addr, size_t len, FILE **output,
+ const char *output_file)
{
- /* we have a problem with reading from a desriptor since we must not
- provide the iconv() function an incomplete character or shift
- sequence at the end of the buffer. Since we have to deal with
- arbitrary encodings we must read the whole text in a buffer and
- process it in one step. */
- static char *inbuf = NULL;
- static size_t maxlen = 0;
- char *inptr = NULL;
- size_t actlen = 0;
-
- while (actlen < maxlen)
- {
- ssize_t n = read (fd, inptr, maxlen - actlen);
+ char *start = addr;
- if (n == 0)
- /* No more text to read. */
- break;
+ /* Process everything in one go. */
+ int ret = process_part (cd, &addr, &len, 0, output, output_file);
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
+ if (ret != 0)
+ return ret;
- inptr += n;
- actlen += n;
+ /* If there is input left over, there is an incomplete multibyte
+ sequence at the end. */
+ if (len > 0)
+ {
+ errno = EINVAL;
+ report_iconv_error (addr - start);
+ return -1;
}
- if (actlen == maxlen)
- while (1)
- {
- ssize_t n;
- char *new_inbuf;
+ return flush_state (cd, output, output_file, addr - start);
+}
- /* Increase the buffer. */
- new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
- if (new_inbuf == NULL)
- {
- error (0, errno, _("unable to allocate buffer for input"));
- return -1;
- }
- inbuf = new_inbuf;
- maxlen += 32768;
- inptr = inbuf + actlen;
- do
- {
- n = read (fd, inptr, maxlen - actlen);
+static int
+process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
+{
+ char inbuf[BUF_SIZE];
+ size_t len = 0;
+ size_t offset = 0;
+ ssize_t n;
- if (n == 0)
- /* No more text to read. */
- break;
+ /* Read into the buffer past unconsumed bytes from the last iteration. */
+ while ((n = read (fd, inbuf + len, BUF_SIZE - len)) > 0)
+ {
+ int ret;
+ char *inptr;
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
+ len += n;
- inptr += n;
- actlen += n;
- }
- while (actlen < maxlen);
+ inptr = inbuf;
+ /* Process what we have read. */
+ ret = process_part (cd, &inptr, &len, offset, output, output_file);
+ if (ret != 0)
+ return ret;
+ /* Keep track of overall position in the input for error reporting. */
+ offset = saturating_add (offset, inptr - inbuf);
- if (n == 0)
- /* Break again so we leave both loops. */
- break;
- }
+ /* Keep incomplete multibyte characters, if any. */
+ memmove (inbuf, inptr, len);
+ }
+
+ if (n == -1)
+ {
+ /* Error while reading. */
+ error (0, errno, _("error while reading the input"));
+ return -1;
+ }
+
+ /* No more input available, so incomplete multibyte sequences are
+ not going to get completed at this point. */
+ if (len > 0)
+ {
+ errno = EINVAL;
+ report_iconv_error (offset);
+ return -1;
+ }
- /* Now we have all the input in the buffer. Process it in one run. */
- return process_block (cd, inbuf, actlen, output, output_file);
+ return flush_state (cd, output, output_file, offset);
}