From patchwork Sat Jun 11 07:53:33 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tim Shen X-Patchwork-Id: 634041 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3rRWWr2X0Fz9t0G for ; Sat, 11 Jun 2016 17:53:59 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=a9T0qvkE; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; q= dns; s=default; b=xwF3IC7jW3ODP6wHEnpOC1Mp1gNYWtLQN6qYQYl7OSM1Jc R5OZ+fe8aUMg4ghGMetr9Ke7gdsROc7DwzSOddMzQ9OlUWuX1FAoTj/ayqiHBTue MKpnNn0jjD1i9Lgp5cj8mV+x1b+2EZylu5CX1uSBeZB7ZzG4TO0hr/LAaqhdo= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; s= default; bh=vKydBvNL/PxZkTAqLdC1N/5KD2M=; b=a9T0qvkEDGKY1dMlhprj RGjbgwYR3i0cOz0DPIR2Jcwe6qPo7oueLQRwyIWXXH/YE4ZLN9OA/JMLPM2kkalo IkZgIZ0H2yIn0oVqw9Zj9GBWrB3TrXIcPPv5wnVt+ssCS83qoXj5zClClIOjzYh+ GEUQiZwAnYlrt+iudctuUIo= Received: (qmail 72334 invoked by alias); 11 Jun 2016 07:53:52 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 71488 invoked by uid 89); 11 Jun 2016 07:53:51 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-4.0 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_LOW, RP_MATCHES_RCVD, SPF_PASS autolearn=ham version=3.3.2 spammy=4599, VERIFY, timshengooglecom, 47015 X-HELO: mail-qk0-f180.google.com Received: from mail-qk0-f180.google.com (HELO mail-qk0-f180.google.com) (209.85.220.180) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Sat, 11 Jun 2016 07:53:36 +0000 Received: by mail-qk0-f180.google.com with SMTP id p10so16680432qke.3 for ; Sat, 11 Jun 2016 00:53:36 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:from:date:message-id:subject:to; bh=rvRieawDHqKvW4ZIlroU3FcvPThYRstGhWzeKtmLKzk=; b=GFJqtY8WGs79hbFSKg6Vulqde6Or0pqHlagX70Zn/SAdvQNqV11mg7HAxbB+oE5Dr8 hfOfF/QcpaovZOXzjWdl9/yY0QaNo39VGoGMLRG2fzBX5FOwkzHhYDHl3ICQwCx9O194 9lw0aWbtyWP5W9lH0a5CFzOHjcxnsvDpTcGlY+7o91AUYSzxk/YCSJonTeBA3PJsFUWw yCoOHKhDYtcfZqhr5OlQwWhyejbTWjZcTPIvz8/eJZ12SRMnL/J/LizqyxpmBLh0wsgS ewh6JMbQGtFRcUkxqiwfPYWRKpG7KztnvZgwhNse/qYeYlwOhLZ353dA9d00aBgWaFBz JRAw== X-Gm-Message-State: ALyK8tJID+mCiL39ezhuRh6whZVes2Mrk12DGnbymFUuC68Xw7on/vpgDnNIkPyrd32w1hRDUL527RelM16OPuM1 X-Received: by 10.55.138.194 with SMTP id m185mr5792721qkd.48.1465631614257; Sat, 11 Jun 2016 00:53:34 -0700 (PDT) MIME-Version: 1.0 Received: by 10.233.237.208 with HTTP; Sat, 11 Jun 2016 00:53:33 -0700 (PDT) From: Tim Shen Date: Sat, 11 Jun 2016 00:53:33 -0700 Message-ID: Subject: [regex, libstdc++/71500, patch] Fix icase on bracket expression To: gcc-patches , "libstdc++" Bootstrapped and tested on x86_64-pc-linux-gnu with debug macro. Thanks! diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 410d61b..d545c04 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -235,8 +235,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform(_CharT __ch) const { - return _M_transform_impl(__ch, typename integral_constant::type()); + return _M_transform_impl( + _M_translate(__ch), + typename integral_constant::type()); } private: diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ff69e16..3513e50 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -428,11 +428,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (!(_M_flags & regex_constants::ECMAScript)) if (_M_try_char()) { - __matcher._M_add_char(_M_value[0]); __last_char.first = true; __last_char.second = _M_value[0]; } while (_M_expression_term(__last_char, __matcher)); + if (__last_char.first) + __matcher._M_add_char(__last_char.second); + __matcher._M_ready(); _M_stack.push(_StateSeqT( *_M_nfa, @@ -449,8 +451,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (_M_match_token(_ScannerT::_S_token_bracket_end)) return false; + const auto __flush = [&] + { + if (__last_char.first) + { + __matcher._M_add_char(__last_char.second); + __last_char.first = false; + } + }; if (_M_match_token(_ScannerT::_S_token_collsymbol)) { + __flush(); auto __symbol = __matcher._M_add_collate_element(_M_value); if (__symbol.size() == 1) { @@ -459,9 +470,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) - __matcher._M_add_equivalence_class(_M_value); + { + __flush(); + __matcher._M_add_equivalence_class(_M_value); + } else if (_M_match_token(_ScannerT::_S_token_char_class_name)) - __matcher._M_add_character_class(_M_value, false); + { + __flush(); + __matcher._M_add_character_class(_M_value, false); + } // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), // except when the '-' is the first or last character in the bracket // expression ([--0]). ECMAScript treats all '-' after a range as a @@ -476,7 +493,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (!__last_char.first) { - __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; if (_M_value[0] == '-' && !(_M_flags & regex_constants::ECMAScript)) { @@ -488,8 +506,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION "a dash is not treated literally only when it is at " "beginning or end."); } - __last_char.first = true; - __last_char.second = _M_value[0]; } else { @@ -499,22 +515,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __matcher._M_make_range(__last_char.second , _M_value[0]); __last_char.first = false; + return true; } - else - { - if (_M_scanner._M_get_token() - != _ScannerT::_S_token_bracket_end) - __throw_regex_error( - regex_constants::error_range, - "Unexpected end of bracket expression."); - __matcher._M_add_char(_M_value[0]); - } - } - else - { - __matcher._M_add_char(_M_value[0]); - __last_char.second = _M_value[0]; + if (_M_scanner._M_get_token() + != _ScannerT::_S_token_bracket_end) + __throw_regex_error( + regex_constants::error_range, + "Unexpected end of bracket expression."); } + __matcher._M_add_char(__last_char.second); + __last_char.second = _M_value[0]; } } else if (_M_match_token(_ScannerT::_S_token_quoted_class)) diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc index d367c8b..7b9aec1 100644 --- a/libstdc++-v3/testsuite/28_regex/regression.cc +++ b/libstdc++-v3/testsuite/28_regex/regression.cc @@ -61,12 +61,26 @@ test03() VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow)); } +// PR libstdc++/71500 +void +test04() +{ + bool test __attribute__((unused)) = true; + + regex re1("[A-F]+", regex::ECMAScript | regex::icase); + VERIFY(regex_match_debug("aaa", re1)); + VERIFY(regex_match_debug("AAA", re1)); + VERIFY(regex_match_debug("fff", re1)); + VERIFY(regex_match_debug("FFF", re1)); +} + int main() { test01(); test02(); test03(); + test04(); return 0; }