From patchwork Sun Nov 9 18:17:38 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tim Shen X-Patchwork-Id: 408613 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id B054C1400D2 for ; Mon, 10 Nov 2014 05:17:52 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:content-type; q= dns; s=default; b=iZKacEW2K4T8TbzBj1L2vcpgkaDhHIyaySCM7AAtMg0+xf FxkvIbYni/mbS1VkcQBZfHVLaJWQgIAUSmLNJMx+eC7wAXxgdrNnJPkQxrlC5hcb XC3VzpnScbfRYi9GUyLB1oKdwruBvMivK37xzHN8S306ORWI1RgGO+hDSvBJw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:content-type; s= default; bh=VoYyTyVdZu8M6ynUn/QbJY4CeFY=; b=FnZ2Aroduo9CnBfqnhBp Zrl8loh2ZPTDPUFCf9y9mmhAPx94NwdYwUi0dhBp/OPN/ra5YS04MZsUc3+mCLho MQH3c+PQp6Vw5xzsupRQ0duyJ01pUkjwbg9KlrLfLQ2xXfVzVpjaQb8bGWqIIr6v n8zSgPo/4L3QFI4SYeI7OOo= Received: (qmail 22464 invoked by alias); 9 Nov 2014 18:17:44 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 22439 invoked by uid 89); 9 Nov 2014 18:17:42 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.2 required=5.0 tests=AWL, BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-Spam-User: qpsmtpd, 2 recipients X-HELO: mail-oi0-f46.google.com Received: from mail-oi0-f46.google.com (HELO mail-oi0-f46.google.com) (209.85.218.46) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Sun, 09 Nov 2014 18:17:40 +0000 Received: by mail-oi0-f46.google.com with SMTP id g201so4431896oib.5 for ; Sun, 09 Nov 2014 10:17:38 -0800 (PST) MIME-Version: 1.0 X-Received: by 10.202.54.194 with SMTP id d185mr2696884oia.77.1415557058385; Sun, 09 Nov 2014 10:17:38 -0800 (PST) Received: by 10.182.120.136 with HTTP; Sun, 9 Nov 2014 10:17:38 -0800 (PST) Date: Sun, 9 Nov 2014 10:17:38 -0800 Message-ID: Subject: [Patch, libstdc++/63775] Fix regex bracket expression parsing From: Tim Shen To: "libstdc++" , gcc-patches Bootstrapped and tested. Thanks! diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 1bbc09d..d8880cc 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template void - _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& + _M_expression_term(pair& __last_char, + _BracketMatcher<_TraitsT, __icase, __collate>& __matcher); int @@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { + if (__l > __r) + __throw_regex_error(regex_constants::error_range); _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 349d92a..b9da491 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -415,8 +415,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_bracket_matcher(bool __neg) { _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); + pair __last_char; // Optional<_CharT> + __last_char.first = false; + if (!(_M_flags & regex_constants::ECMAScript)) + if (_M_try_char()) + { + __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; + } while (!_M_match_token(_ScannerT::_S_token_bracket_end)) - _M_expression_term(__matcher); + _M_expression_term(__last_char, __matcher); __matcher._M_ready(); _M_stack.push(_StateSeqT( *_M_nfa, @@ -427,7 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template void _Compiler<_TraitsT>:: - _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) + _M_expression_term(pair& __last_char, + _BracketMatcher<_TraitsT, __icase, __collate>& __matcher) { if (_M_match_token(_ScannerT::_S_token_collsymbol)) __matcher._M_add_collating_element(_M_value); @@ -435,27 +445,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __matcher._M_add_equivalence_class(_M_value); else if (_M_match_token(_ScannerT::_S_token_char_class_name)) __matcher._M_add_character_class(_M_value, false); - else if (_M_try_char()) // [a + // POSIX doesn't permit '-' as a start-range char (say [a-z--0]), + // except that the '-' is the first character in the bracket expression + // ([--0]). ECMAScript treats all '-' after a range as a normal character. + // Also see above, where _M_expression_term gets called. + // + // As a result, POSIX rejects [-----], but ECMAScript doesn't. + // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax. + // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. + // + // It turns out that no one reads BNFs ;) + else if (_M_try_char()) { - auto __ch = _M_value[0]; - if (_M_try_char()) + if (!__last_char.first) + { + if (_M_value[0] == '-' + && !(_M_flags & regex_constants::ECMAScript)) + __throw_regex_error(regex_constants::error_range); + __matcher._M_add_char(_M_value[0]); + __last_char.first = true; + __last_char.second = _M_value[0]; + } + else { - if (_M_value[0] == '-') // [a- + if (_M_value[0] == '-') { - if (_M_try_char()) // [a-z] + if (_M_try_char()) { - __matcher._M_make_range(__ch, _M_value[0]); - return; + __matcher._M_make_range(__last_char.second , _M_value[0]); + __last_char.first = false; + } + else + { + if (_M_scanner._M_get_token() + != _ScannerT::_S_token_bracket_end) + __throw_regex_error(regex_constants::error_range); + __matcher._M_add_char(_M_value[0]); } - // If the dash is the last character in the bracket - // expression, it is not special. - if (_M_scanner._M_get_token() - != _ScannerT::_S_token_bracket_end) - __throw_regex_error(regex_constants::error_range); } - __matcher._M_add_char(_M_value[0]); + else + { + __matcher._M_add_char(_M_value[0]); + __last_char.second = _M_value[0]; + } } - __matcher._M_add_char(__ch); } else if (_M_match_token(_ScannerT::_S_token_quoted_class)) __matcher._M_add_character_class(_M_value, diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc new file mode 100644 index 0000000..88dc83c --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc @@ -0,0 +1,124 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-08-01 Tim Shen +// +// Copyright (C) 2013-2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.2 regex_match +// Tests Extended bracket expression against a C-string. + +#include +#include +#include + +using namespace __gnu_test; +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + + { + std::regex re("pre/[za-x]", std::regex::extended); + VERIFY( regex_match_debug("pre/z", re) ); + VERIFY( regex_match_debug("pre/a", re) ); + VERIFY( !regex_match_debug("pre/y", re) ); + } + { + std::regex re("pre/[[:uPPer:]]", std::regex::extended); + VERIFY( regex_match_debug("pre/Z", re) ); + VERIFY( !regex_match_debug("pre/_", re) ); + VERIFY( !regex_match_debug("pre/a", re) ); + VERIFY( !regex_match_debug("pre/0", re) ); + } + { + std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase); + VERIFY( regex_match_debug("pre/Z", re) ); + VERIFY( regex_match_debug("pre/a", re) ); + } + { + std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended); + VERIFY( regex_match_debug("pre/~", re) ); + VERIFY( regex_match_debug("pre/_", re) ); + VERIFY( regex_match_debug("pre/a", re) ); + VERIFY( regex_match_debug("pre/0", re) ); + } + { + std::regex re("pre/[[=a=]]", std::regex::extended); + VERIFY( regex_match_debug("pre/a", re) ); + VERIFY( regex_match_debug("pre/A", re) ); + } +} + +void +test02() +{ + bool test __attribute__((unused)) = true; + + try + { + std::regex re("[-----]", std::regex::extended); + } + catch (const std::regex_error& e) + { + VERIFY(e.code() == std::regex_constants::error_range); + } + std::regex re("[-----]", std::regex::ECMAScript); +} + +void +test03() +{ + bool test __attribute__((unused)) = true; + + try + { + std::regex re("[z-a]", std::regex::extended); + } + catch (const std::regex_error& e) + { + VERIFY(e.code() == std::regex_constants::error_range); + } +} + +void +test04() +{ + bool test __attribute__((unused)) = true; + + std::regex re("[-0-9a-z]"); + VERIFY(regex_match_debug("-", re)); + VERIFY(regex_match_debug("1", re)); + VERIFY(regex_match_debug("w", re)); + re.assign("[-0-9a-z]", regex_constants::basic); + VERIFY(regex_match_debug("-", re)); + VERIFY(regex_match_debug("1", re)); + VERIFY(regex_match_debug("w", re)); +} + +int +main() +{ + test01(); + test02(); + test03(); + test04(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc deleted file mode 100644 index ca2a5f5..0000000 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc +++ /dev/null @@ -1,75 +0,0 @@ -// { dg-options "-std=gnu++11" } - -// -// 2013-08-01 Tim Shen -// -// Copyright (C) 2013-2014 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this library; see the file COPYING3. If not see -// . - -// 28.11.2 regex_match -// Tests Extended bracket expression against a C-string. - -#include -#include -#include - -using namespace __gnu_test; -using namespace std; - -void -test01() -{ - bool test __attribute__((unused)) = true; - - { - std::regex re("pre/[za-x]", std::regex::extended); - VERIFY( regex_match_debug("pre/z", re) ); - VERIFY( regex_match_debug("pre/a", re) ); - VERIFY( !regex_match_debug("pre/y", re) ); - } - { - std::regex re("pre/[[:uPPer:]]", std::regex::extended); - VERIFY( regex_match_debug("pre/Z", re) ); - VERIFY( !regex_match_debug("pre/_", re) ); - VERIFY( !regex_match_debug("pre/a", re) ); - VERIFY( !regex_match_debug("pre/0", re) ); - } - { - std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase); - VERIFY( regex_match_debug("pre/Z", re) ); - VERIFY( regex_match_debug("pre/a", re) ); - } - { - std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended); - VERIFY( regex_match_debug("pre/~", re) ); - VERIFY( regex_match_debug("pre/_", re) ); - VERIFY( regex_match_debug("pre/a", re) ); - VERIFY( regex_match_debug("pre/0", re) ); - } - { - std::regex re("pre/[[=a=]]", std::regex::extended); - VERIFY( regex_match_debug("pre/a", re) ); - VERIFY( regex_match_debug("pre/A", re) ); - } -} - -int -main() -{ - test01(); - return 0; -}