diff mbox

[regex,libstdc++/71500] Fix icase on bracket expression

Message ID CAG4ZjNmpc+rDKY_u2kH_mdeb_JNdWPTi5KP2iJG0W-G_N0__pA@mail.gmail.com
State New
Headers show

Commit Message

Tim Shen June 11, 2016, 6:46 p.m. UTC
On Sat, Jun 11, 2016 at 5:01 AM, Jonathan Wakely wrote:
> N.B. The "typename" and "::type" are redundant here, because it names
> the same type as the integral_constant itself, and you could
> use __bool_constant<__collate> instead:
>
>         return _M_transform_impl(_M_translate(__ch),
>                                 __bool_constant<__collate>());
>
> OK for trunk without the redundant typename ...::type, your choice
> whether to use __bool_constant or not.

Thanks! I was looking at std::bool_constant but that's in C++17.
__bool_constant is even better. :)

>
> Will this fix apply cleanly to the branches too?
>

For gcc6 yes; for gcc5 there needs more work. I guess it's OK for
backporting to gcc6?

Updated the patch according to the discussion in the libstdc++/71500 bug.

Comments

Jonathan Wakely June 13, 2016, 7:30 a.m. UTC | #1
On 11/06/16 11:46 -0700, Tim Shen wrote:
>On Sat, Jun 11, 2016 at 5:01 AM, Jonathan Wakely wrote:
>> N.B. The "typename" and "::type" are redundant here, because it names
>> the same type as the integral_constant itself, and you could
>> use __bool_constant<__collate> instead:
>>
>>         return _M_transform_impl(_M_translate(__ch),
>>                                 __bool_constant<__collate>());
>>
>> OK for trunk without the redundant typename ...::type, your choice
>> whether to use __bool_constant or not.
>
>Thanks! I was looking at std::bool_constant but that's in C++17.
>__bool_constant is even better. :)
>
>>
>> Will this fix apply cleanly to the branches too?
>>
>
>For gcc6 yes; for gcc5 there needs more work. I guess it's OK for
>backporting to gcc6?

Yes, OK for trunk and gcc-6-branch, thanks.
diff mbox

Patch

diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 410d61b..73c0af4 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -226,18 +226,13 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	if (__icase)
 	  return _M_traits.translate_nocase(__ch);
-	else if (__collate)
-	  return _M_traits.translate(__ch);
 	else
-	  return __ch;
+	  return _M_traits.translate(__ch);
       }
 
       _StrTransT
       _M_transform(_CharT __ch) const
-      {
-	return _M_transform_impl(__ch, typename integral_constant<bool,
-				 __collate>::type());
-      }
+      { return _M_transform_impl(__ch, __bool_constant<__collate>()); }
 
     private:
       _StrTransT
@@ -247,7 +242,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StrTransT
       _M_transform_impl(_CharT __ch, true_type) const
       {
-	_StrTransT __str = _StrTransT(1, _M_translate(__ch));
+	_StrTransT __str = _StrTransT(1, __ch);
 	return _M_traits.transform(__str.begin(), __str.end());
       }
 
@@ -433,6 +428,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
+	__l = _M_translator._M_translate(__l);
+	__r = _M_translator._M_translate(__r);
 	if (__l > __r)
 	  __throw_regex_error(regex_constants::error_range,
 			      "Invalid range in bracket expression.");
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index ff69e16..050435d 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -428,11 +428,13 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (!(_M_flags & regex_constants::ECMAScript))
 	if (_M_try_char())
 	  {
-	    __matcher._M_add_char(_M_value[0]);
 	    __last_char.first = true;
 	    __last_char.second = _M_value[0];
 	  }
       while (_M_expression_term(__last_char, __matcher));
+      if (__last_char.first)
+	__matcher._M_add_char(__last_char.second);
+
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
 		      *_M_nfa,
@@ -449,8 +451,17 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (_M_match_token(_ScannerT::_S_token_bracket_end))
 	return false;
 
+      const auto __flush = [&]
+      {
+	if (__last_char.first)
+	  {
+	    __matcher._M_add_char(__last_char.second);
+	    __last_char.first = false;
+	  }
+      };
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
 	{
+	  __flush();
 	  auto __symbol = __matcher._M_add_collate_element(_M_value);
 	  if (__symbol.size() == 1)
 	    {
@@ -459,9 +470,15 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	}
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
-	__matcher._M_add_equivalence_class(_M_value);
+	{
+	  __flush();
+	  __matcher._M_add_equivalence_class(_M_value);
+	}
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
-	__matcher._M_add_character_class(_M_value, false);
+	{
+	  __flush();
+	  __matcher._M_add_character_class(_M_value, false);
+	}
       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
       // except when the '-' is the first or last character in the bracket
       // expression ([--0]). ECMAScript treats all '-' after a range as a
@@ -476,7 +493,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  if (!__last_char.first)
 	    {
-	      __matcher._M_add_char(_M_value[0]);
+	      __last_char.first = true;
+	      __last_char.second = _M_value[0];
 	      if (_M_value[0] == '-'
 		  && !(_M_flags & regex_constants::ECMAScript))
 		{
@@ -488,8 +506,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    "a dash is not treated literally only when it is at "
 		    "beginning or end.");
 		}
-	      __last_char.first = true;
-	      __last_char.second = _M_value[0];
 	    }
 	  else
 	    {
@@ -499,22 +515,16 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    {
 		      __matcher._M_make_range(__last_char.second , _M_value[0]);
 		      __last_char.first = false;
+		      return true;
 		    }
-		  else
-		    {
-		      if (_M_scanner._M_get_token()
-			  != _ScannerT::_S_token_bracket_end)
-			__throw_regex_error(
-			  regex_constants::error_range,
-			  "Unexpected end of bracket expression.");
-		      __matcher._M_add_char(_M_value[0]);
-		    }
-		}
-	      else
-		{
-		  __matcher._M_add_char(_M_value[0]);
-		  __last_char.second = _M_value[0];
+		  if (_M_scanner._M_get_token()
+		      != _ScannerT::_S_token_bracket_end)
+		    __throw_regex_error(
+		      regex_constants::error_range,
+		      "Unexpected end of bracket expression.");
 		}
+	      __matcher._M_add_char(__last_char.second);
+	      __last_char.second = _M_value[0];
 	    }
 	}
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
@@ -580,8 +590,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _BracketMatcher<_TraitsT, __icase, __collate>::
     _M_apply(_CharT __ch, false_type) const
     {
+      __ch = _M_translator._M_translate(__ch);
       bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
-				      _M_translator._M_translate(__ch));
+				      __ch);
       if (!__ret)
 	{
 	  auto __s = _M_translator._M_transform(__ch);
diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc
index d367c8b..45b9f93 100644
--- a/libstdc++-v3/testsuite/28_regex/regression.cc
+++ b/libstdc++-v3/testsuite/28_regex/regression.cc
@@ -61,12 +61,40 @@  test03()
   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow));
 }
 
+// PR libstdc++/71500
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  {
+    regex re1("[A-F]+", regex::ECMAScript | regex::icase);
+    VERIFY(regex_match_debug("aaa", re1));
+    VERIFY(regex_match_debug("AAA", re1));
+    VERIFY(regex_match_debug("fff", re1));
+    VERIFY(regex_match_debug("FFF", re1));
+  }
+  {
+    bool caught = false;
+    try
+    {
+      (void)regex("[T-f]+", regex::ECMAScript | regex::icase);
+    }
+    catch (...)
+    {
+      caught = true;
+    }
+    VERIFY(caught);
+  }
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
   return 0;
 }