From patchwork Sat Feb 13 19:13:16 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tim Shen X-Patchwork-Id: 582437 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 00C7E1402BC for ; Sun, 14 Feb 2016 06:13:29 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=MSKKmLt9; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:content-type; q= dns; s=default; b=uXgm0uuHbWGBUJKYvIsLpKKJU/WQtXRQt0lwtV81x2aL/r sppWbWsa01Ntp0Fu+J5f8A3zOT260DGe8JQ/WhvObrpoyIVLBh2ng+X9VnXnPvyA h6o9Abjk74c7vGIfzQscgSy3S/fzbZdUgv1aney9FgXXbg1cl79pgz1xSqvvM= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:content-type; s= default; bh=zkK7DFvOIK3VQWM0dU/qUcR3T1o=; b=MSKKmLt9DqIDUsUaype9 G92IDaWjRv6rPYs5eodPx9cK6EyUYB7fNREwE9jHNh7sBrdosPpN7cAAUSBDajxz v4k6j7vgcbraLX2h7ANpoTjPh3MxX03VYf1X0+E+0RmwyVsb8dxzmJFg5Q5jaq/c 89FZBnoetK6O4VFLjfYDKpo= Received: (qmail 11131 invoked by alias); 13 Feb 2016 19:13:21 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 11109 invoked by uid 89); 13 Feb 2016 19:13:20 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_NONE, RP_MATCHES_RCVD, SPF_PASS autolearn=ham version=3.3.2 spammy=3310, VERIFY, test02, test01 X-HELO: mail-yw0-f178.google.com Received: from mail-yw0-f178.google.com (HELO mail-yw0-f178.google.com) (209.85.161.178) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Sat, 13 Feb 2016 19:13:18 +0000 Received: by mail-yw0-f178.google.com with SMTP id h129so88746209ywb.1 for ; Sat, 13 Feb 2016 11:13:18 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:date:message-id:subject:from:to :content-type; bh=gr+czD56VIrOoYs9TpuvR7BIaei7/27yd//KJVzkZAA=; b=IO07HwOZa9s09bkzRk8ROPAi87uReSvH485MnRWoQ07kW98wFRsbv6zn6BRO8RGYVW YiKOK9AwoDAjxMTvzucaC+0zPE+PU0hia/JcsAFuVSg2ePw9jxgPpEah3UrprsQG64Sy tF2BHu+oK950cQkoWHrI6g2vdgaZ8/aNw7j1kjtY7Eb6C5EvPDQ0VQQLUGksttP7YFN+ npCpoJcgni0NVr247IUEVJ6+f2Z/XXBWzbKvITEVHP1lyeCAPmdHVp9j+VkED93ecZUb O9oq4V3k8b8jcfxDamLr8TsyCMSZxbVVDSfiK7qcs2Tq+BG+0M8BKEfuUubnBfg9wN51 E/tQ== X-Gm-Message-State: AG10YOStf+BL2le2Nv6mvciuOoPOzJ+EJSpR+dfdoINY43rdKuelhigjUsnAB2klLvU+Ed1lJshkDJq+dpV7nBtM MIME-Version: 1.0 X-Received: by 10.13.246.5 with SMTP id g5mr4756682ywf.118.1455390796784; Sat, 13 Feb 2016 11:13:16 -0800 (PST) Received: by 10.129.125.197 with HTTP; Sat, 13 Feb 2016 11:13:16 -0800 (PST) Date: Sat, 13 Feb 2016 11:13:16 -0800 Message-ID: Subject: [Patch, regex, libstdc++/69794] Unify special character parsing From: Tim Shen To: "libstdc++" , gcc-patches I did it wrong in r227289 - I ignored the "\n" special case in grep. Turns out using code to handle special cases is error prone, so I turned to use data (_M_grep_spec_char and _M_egrep_spec_char). Bootstrapped and tested on x86_64-pc-linux-gnu. Thanks! diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h index bff7366..16071da 100644 --- a/libstdc++-v3/include/bits/regex_scanner.h +++ b/libstdc++-v3/include/bits/regex_scanner.h @@ -95,11 +95,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION : _M_awk_escape_tbl), _M_spec_char(_M_is_ecma() ? _M_ecma_spec_char - : _M_is_basic() + : _M_flags & regex_constants::basic ? _M_basic_spec_char - : _M_extended_spec_char), + : _M_flags & regex_constants::extended + ? _M_extended_spec_char + : _M_flags & regex_constants::grep + ? _M_grep_spec_char + : _M_flags & regex_constants::egrep + ? _M_egrep_spec_char + : _M_flags & regex_constants::awk + ? _M_extended_spec_char + : nullptr), _M_at_bracket_start(false) - { } + { __glibcxx_assert(_M_spec_char); } protected: const char* @@ -177,6 +185,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; const char* _M_basic_spec_char = ".[\\*^$"; const char* _M_extended_spec_char = ".[\\()*+?{|^$"; + const char* _M_grep_spec_char = ".[\\*^$\n"; + const char* _M_egrep_spec_char = ".[\\()*+?{|^$\n"; _StateT _M_state; _FlagT _M_flags; diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc index 920cb14..fedba09 100644 --- a/libstdc++-v3/include/bits/regex_scanner.tcc +++ b/libstdc++-v3/include/bits/regex_scanner.tcc @@ -97,9 +97,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_scan_normal() { auto __c = *_M_current++; - const char* __pos; - if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')) == nullptr) + if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) { _M_token = _S_token_ord_char; _M_value.assign(1, __c); @@ -177,12 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_state = _S_state_in_brace; _M_token = _S_token_interval_begin; } - else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'))) - != nullptr - && *__pos != '\0' - && __c != ']' - && __c != '}') - || (_M_is_grep() && __c == '\n')) + else if (__c != ']' && __c != '}') { auto __it = _M_token_tbl; auto __narrowc = _M_ctype.narrow(__c, '\0'); diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc index f95bef9..c9a3402 100644 --- a/libstdc++-v3/testsuite/28_regex/regression.cc +++ b/libstdc++-v3/testsuite/28_regex/regression.cc @@ -33,10 +33,26 @@ test01() regex re("((.)", regex_constants::basic); } +void +test02() +{ + bool test __attribute__((unused)) = true; + + std::string re_str + { + "/abcd" "\n" + "/aecf" "\n" + "/ghci" + }; + auto rx = std::regex(re_str, std::regex_constants::grep | std::regex_constants::icase); + VERIFY(std::regex_search("/abcd", rx)); +} + int main() { test01(); + test02(); return 0; }