From patchwork Sun Aug 9 22:14:24 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Benjamin Herr X-Patchwork-Id: 505481 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 195C91401F6 for ; Mon, 10 Aug 2015 08:16:06 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.b=th0CJyjL; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=CSSMtRmpfM8n3yu1zvNQ9RxPHFFjZ00 gOYyQtlD8d3p1aNTinsABaFUbKU3osNS++jg8N9fps3ChHq0FCq/F10FNw45EpAP 859Q3cL1TJR9AV+f9xhcWH0lUYV/fCn0x3rl/NUGN50wwhwGsK6lrMSi0sMtAsUy koJOg3VS4DKU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; s=default; bh=J7zPdxjLg4F0UG7asg/YG2Rfh9k=; b=th0CJ yjLVIwbB+OiSqo9PecNbB2zuMthC3atWchuBas7VaKA3B6atxubZ8CPB9M/KwZrZ M36DC8rlF6WxesqQrWk91+CwFJiX9fs2yYvYFue3JVUHkR71qL9xiigNsuHs+uMA B3vffZDE0BN4qEfvwZNZGoas9nR7wM4OHa8OLI= Received: (qmail 64342 invoked by alias); 9 Aug 2015 22:15:55 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 64247 invoked by uid 89); 9 Aug 2015 22:15:54 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=1.8 required=5.0 tests=AWL, BAYES_50, KAM_LAZY_DOMAIN_SECURITY, RCVD_IN_DNSWL_NONE, RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: stormwind.0x539.de From: Benjamin Herr To: libc-alpha@sourceware.org Subject: [PATCH 2/2] iconv_prog: Track if invalid sequences were seen. Date: Mon, 10 Aug 2015 00:14:24 +0200 Message-Id: <1439158464-18443-3-git-send-email-ben@0x539.de> In-Reply-To: <1439158464-18443-1-git-send-email-ben@0x539.de> References: <1439158464-18443-1-git-send-email-ben@0x539.de> As far as I can tell, Posix requires that "The presence or absence of -c shall not affect the exit status of iconv." iconv_prog needs to be more vigilant to not clobber the variable tracking whether it has skipped an invalid input sequence. In addition, it seems reasonable to treat an incomplete input sequence at the end of the input the same way as an invalid input sequence elsewhere if -c is passed. --- 2015-08-09 Benjamin Herr * iconv/iconv_prog.c: Track more diligently whether an invalid input sequence has been encountered. iconv/iconv_prog.c | 68 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index a935402..809e18f 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -540,36 +540,39 @@ process_part (iconv_t cd, char **addr, size_t *len, size_t offset, char *outptr; size_t outlen; size_t n; - int ret = 0; + int invalid_sequence_seen = 0; - while (*len > 0) + for (;;) { outptr = outbuf; outlen = BUF_SIZE; n = iconv (cd, addr, len, &outptr, &outlen); - if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) - { - ret = 1; - if (*len == 0) - n = 0; - else - errno = E2BIG; - } - if (outptr != outbuf) { - ret = write_output (outbuf, outptr, output, output_file); + int ret = write_output (outbuf, outptr, output, output_file); if (ret != 0) - break; + return ret; } + /* Done with the input buffer. */ + if (n != (size_t) -1) + break; + /* Incomplete multibyte characters might be completed by the next chunk, so do not treat them as an error here. */ - if (n != (size_t) -1 || errno == EINVAL) + if (errno == EINVAL) + break; + + if (omit_invalid && errno == EILSEQ) { - ret = 0; - break; + /* Remember that we saw an invalid character for the sake of our + exit status, but otherwise carry on. */ + invalid_sequence_seen = 1; + if (*len == 0) + break; + else + errno = E2BIG; } if (errno != E2BIG) @@ -581,7 +584,7 @@ process_part (iconv_t cd, char **addr, size_t *len, size_t offset, } } - return ret; + return invalid_sequence_seen; } static int @@ -589,6 +592,7 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output, const char *output_file) { char *start = addr; + int invalid_sequence_seen = 0; /* Process everything in one go. */ int ret = process_part (cd, &addr, &len, 0, output, output_file); @@ -600,12 +604,24 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output, sequence at the end. */ if (len > 0) { + /* Incomplete multibyte sequences at the end of the input are not any + more invalid than spurious bytes anywhere else. */ + if (omit_invalid) + return 1; + errno = EINVAL; report_iconv_error (addr - start); return -1; } - return flush_state (cd, output, output_file, addr - start); + if (ret > 0) + invalid_sequence_seen = ret; + + ret = flush_state (cd, output, output_file, addr - start); + if (ret != 0) + return ret; + else + return invalid_sequence_seen; } @@ -616,11 +632,12 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) size_t len = 0; size_t offset = 0; ssize_t n; + int ret; + int invalid_sequence_seen = 0; /* Read into the buffer past unconsumed bytes from the last iteration. */ while ((n = read (fd, inbuf + len, BUF_SIZE - len)) > 0) { - int ret; char *inptr; len += n; @@ -628,8 +645,13 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) inptr = inbuf; /* Process what we have read. */ ret = process_part (cd, &inptr, &len, offset, output, output_file); - if (ret != 0) + if (ret < 0) return ret; + + /* Remember this for the sake of our exit status, but carry on. */ + if (ret > 0) + invalid_sequence_seen = ret; + /* Keep track of overall position in the input for error reporting. */ offset = saturating_add (offset, inptr - inbuf); @@ -653,7 +675,11 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) return -1; } - return flush_state (cd, output, output_file, offset); + ret = flush_state (cd, output, output_file, offset); + if (ret != 0) + return ret; + else + return invalid_sequence_seen; }