diff mbox

[regex,libstdc++/71500] Fix icase on bracket expression

Message ID CAG4ZjN=tjmJC40hBBkJ3FKpg5YKmJO4ZVmQbJ-RDE-JwUa3RKQ@mail.gmail.com
State New
Headers show

Commit Message

Tim Shen June 11, 2016, 7:53 a.m. UTC
Bootstrapped and tested on x86_64-pc-linux-gnu with debug macro.

Thanks!

Comments

Jonathan Wakely June 11, 2016, 12:01 p.m. UTC | #1
On 11/06/16 00:53 -0700, Tim Shen wrote:
>diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
>index 410d61b..d545c04 100644
>--- a/libstdc++-v3/include/bits/regex_compiler.h
>+++ b/libstdc++-v3/include/bits/regex_compiler.h
>@@ -235,8 +235,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       _StrTransT
>       _M_transform(_CharT __ch) const
>       {
>-	return _M_transform_impl(__ch, typename integral_constant<bool,
>-				 __collate>::type());
>+	return _M_transform_impl(
>+	  _M_translate(__ch),
>+	  typename integral_constant<bool, __collate>::type());

N.B. The "typename" and "::type" are redundant here, because it names
the same type as the integral_constant itself, and you could
use __bool_constant<__collate> instead:

	return _M_transform_impl(_M_translate(__ch),
                                 __bool_constant<__collate>());

OK for trunk without the redundant typename ...::type, your choice
whether to use __bool_constant or not.

Will this fix apply cleanly to the branches too?
diff mbox

Patch

diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 410d61b..d545c04 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -235,8 +235,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StrTransT
       _M_transform(_CharT __ch) const
       {
-	return _M_transform_impl(__ch, typename integral_constant<bool,
-				 __collate>::type());
+	return _M_transform_impl(
+	  _M_translate(__ch),
+	  typename integral_constant<bool, __collate>::type());
       }
 
     private:
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index ff69e16..3513e50 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -428,11 +428,13 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (!(_M_flags & regex_constants::ECMAScript))
 	if (_M_try_char())
 	  {
-	    __matcher._M_add_char(_M_value[0]);
 	    __last_char.first = true;
 	    __last_char.second = _M_value[0];
 	  }
       while (_M_expression_term(__last_char, __matcher));
+      if (__last_char.first)
+	__matcher._M_add_char(__last_char.second);
+
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
 		      *_M_nfa,
@@ -449,8 +451,17 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (_M_match_token(_ScannerT::_S_token_bracket_end))
 	return false;
 
+      const auto __flush = [&]
+      {
+	if (__last_char.first)
+	  {
+	    __matcher._M_add_char(__last_char.second);
+	    __last_char.first = false;
+	  }
+      };
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
 	{
+	  __flush();
 	  auto __symbol = __matcher._M_add_collate_element(_M_value);
 	  if (__symbol.size() == 1)
 	    {
@@ -459,9 +470,15 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	}
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
-	__matcher._M_add_equivalence_class(_M_value);
+	{
+	  __flush();
+	  __matcher._M_add_equivalence_class(_M_value);
+	}
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
-	__matcher._M_add_character_class(_M_value, false);
+	{
+	  __flush();
+	  __matcher._M_add_character_class(_M_value, false);
+	}
       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
       // except when the '-' is the first or last character in the bracket
       // expression ([--0]). ECMAScript treats all '-' after a range as a
@@ -476,7 +493,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  if (!__last_char.first)
 	    {
-	      __matcher._M_add_char(_M_value[0]);
+	      __last_char.first = true;
+	      __last_char.second = _M_value[0];
 	      if (_M_value[0] == '-'
 		  && !(_M_flags & regex_constants::ECMAScript))
 		{
@@ -488,8 +506,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    "a dash is not treated literally only when it is at "
 		    "beginning or end.");
 		}
-	      __last_char.first = true;
-	      __last_char.second = _M_value[0];
 	    }
 	  else
 	    {
@@ -499,22 +515,16 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    {
 		      __matcher._M_make_range(__last_char.second , _M_value[0]);
 		      __last_char.first = false;
+		      return true;
 		    }
-		  else
-		    {
-		      if (_M_scanner._M_get_token()
-			  != _ScannerT::_S_token_bracket_end)
-			__throw_regex_error(
-			  regex_constants::error_range,
-			  "Unexpected end of bracket expression.");
-		      __matcher._M_add_char(_M_value[0]);
-		    }
-		}
-	      else
-		{
-		  __matcher._M_add_char(_M_value[0]);
-		  __last_char.second = _M_value[0];
+		  if (_M_scanner._M_get_token()
+		      != _ScannerT::_S_token_bracket_end)
+		    __throw_regex_error(
+		      regex_constants::error_range,
+		      "Unexpected end of bracket expression.");
 		}
+	      __matcher._M_add_char(__last_char.second);
+	      __last_char.second = _M_value[0];
 	    }
 	}
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc
index d367c8b..7b9aec1 100644
--- a/libstdc++-v3/testsuite/28_regex/regression.cc
+++ b/libstdc++-v3/testsuite/28_regex/regression.cc
@@ -61,12 +61,26 @@  test03()
   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow));
 }
 
+// PR libstdc++/71500
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex re1("[A-F]+", regex::ECMAScript | regex::icase);
+  VERIFY(regex_match_debug("aaa", re1));
+  VERIFY(regex_match_debug("AAA", re1));
+  VERIFY(regex_match_debug("fff", re1));
+  VERIFY(regex_match_debug("FFF", re1));
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
   return 0;
 }