diff mbox

[libstdc++/67015] Fix regex POSIX bracket parsing

Message ID CAG4ZjNnA=tHCiKmbUyXnokomDpUTc+DmPybz4ajDkqcjVvwY8A@mail.gmail.com
State New
Headers show

Commit Message

Tim Shen July 28, 2015, 2:40 a.m. UTC
On Mon, Jul 27, 2015 at 4:45 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 26/07/15 05:20 -0700, Tim Shen wrote:
>>
>> @@ -389,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> #endif
>>       }
>>
>> -      void
>> +      _StringT
>>       _M_add_collating_element(const _StringT& __s)
>>       {
>>         auto __st = _M_traits.lookup_collatename(__s.data(),
>
>
> Changing this return type is an ABI change. When compiled with older
> versions of GCC this function will not return anything, so code
> compiled with newer versions that links to that older code will read
> garbage off the stack if linked to the old version.
>
> (This isn't a problem for _M_expression_term because the return type
> is part of the mangled name for function templates).
>
> One solution would be to rename the function, so that code compiled
> with the new GCC will use the new function, not link to the old
> version that doesn't return anything.

Done by s/_M_add_collating_element/_M_add_collate_element/.

Comments

Jonathan Wakely July 28, 2015, 10:27 a.m. UTC | #1
On 27/07/15 19:40 -0700, Tim Shen wrote:
>Done by s/_M_add_collating_element/_M_add_collate_element/.

Great, thanks. OK for trunk and gcc-5-branch.
Tim Shen July 29, 2015, 4:44 a.m. UTC | #2
On Tue, Jul 28, 2015 at 3:27 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 27/07/15 19:40 -0700, Tim Shen wrote:
>>
>> Done by s/_M_add_collating_element/_M_add_collate_element/.
>
>
> Great, thanks. OK for trunk and gcc-5-branch.

Committed. Is there no need for gcc-4_9-branch? What's the general
policy for backporting fixes?

Thanks!
Jonathan Wakely July 29, 2015, 8:35 a.m. UTC | #3
On 28/07/15 21:44 -0700, Tim Shen wrote:
>On Tue, Jul 28, 2015 at 3:27 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
>> On 27/07/15 19:40 -0700, Tim Shen wrote:
>>>
>>> Done by s/_M_add_collating_element/_M_add_collate_element/.
>>
>>
>> Great, thanks. OK for trunk and gcc-5-branch.
>
>Committed. Is there no need for gcc-4_9-branch? What's the general
>policy for backporting fixes?

In general, only fixes for regressions (and changes to documentation
or tests) should be backported.

The 4.9 branch is the oldest one we still support, so should be very
stable now, although C++11 support in 4.9 is still labelled
experimental, and this only changes C++11 code. How important is it to
fix?
Tim Shen July 29, 2015, 8:56 a.m. UTC | #4
On Wed, Jul 29, 2015 at 1:35 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> The 4.9 branch is the oldest one we still support, so should be very
> stable now, although C++11 support in 4.9 is still labelled
> experimental, and this only changes C++11 code. How important is it to
> fix?

Um... I'd say medium importance, for [a-z0-9-] not working in POSIX
syntax (while ECMAScript seems to be more popular, which works
correctly).

If 4.9 is `stable` and expected to be experimetal, which by my
definition should only include important changes, we may leave it as
is?
Jonathan Wakely July 29, 2015, 9:11 a.m. UTC | #5
On 29/07/15 01:56 -0700, Tim Shen wrote:
>If 4.9 is `stable` and expected to be experimetal, which by my
>definition should only include important changes, we may leave it as
>is?

Yes, let's not change the 4.9 version.
diff mbox

Patch

diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 4472116..0cb0c04 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -116,8 +116,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	void
 	_M_insert_bracket_matcher(bool __neg);
 
+      // Returns true if successfully matched one term and should continue.
+      // Returns false if the compiler should move on.
       template<bool __icase, bool __collate>
-	void
+	bool
 	_M_expression_term(pair<bool, _CharT>& __last_char,
 			   _BracketMatcher<_TraitsT, __icase, __collate>&
 			   __matcher);
@@ -389,8 +391,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
       }
 
-      void
-      _M_add_collating_element(const _StringT& __s)
+      _StringT
+      _M_add_collate_element(const _StringT& __s)
       {
 	auto __st = _M_traits.lookup_collatename(__s.data(),
 						 __s.data() + __s.size());
@@ -400,6 +402,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifdef _GLIBCXX_DEBUG
 	_M_is_ready = false;
 #endif
+	return __st;
       }
 
       void
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 33d7118..9a62311 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -424,8 +424,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    __last_char.first = true;
 	    __last_char.second = _M_value[0];
 	  }
-      while (!_M_match_token(_ScannerT::_S_token_bracket_end))
-	_M_expression_term(__last_char, __matcher);
+      while (_M_expression_term(__last_char, __matcher));
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(
 		      *_M_nfa,
@@ -434,21 +433,31 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename _TraitsT>
   template<bool __icase, bool __collate>
-    void
+    bool
     _Compiler<_TraitsT>::
     _M_expression_term(pair<bool, _CharT>& __last_char,
 		       _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
     {
+      if (_M_match_token(_ScannerT::_S_token_bracket_end))
+	return false;
+
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
-	__matcher._M_add_collating_element(_M_value);
+	{
+	  auto __symbol = __matcher._M_add_collate_element(_M_value);
+	  if (__symbol.size() == 1)
+	    {
+	      __last_char.first = true;
+	      __last_char.second = __symbol[0];
+	    }
+	}
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
 	__matcher._M_add_equivalence_class(_M_value);
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
 	__matcher._M_add_character_class(_M_value, false);
-      // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
-      // except when the '-' is the first character in the bracket expression
-      // ([--0]). ECMAScript treats all '-' after a range as a normal character.
-      // Also see above, where _M_expression_term gets called.
+      // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
+      // except when the '-' is the first or last character in the bracket
+      // expression ([--0]). ECMAScript treats all '-' after a range as a
+      // normal character. Also see above, where _M_expression_term gets called.
       //
       // As a result, POSIX rejects [-----], but ECMAScript doesn't.
       // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
@@ -459,10 +468,14 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  if (!__last_char.first)
 	    {
+	      __matcher._M_add_char(_M_value[0]);
 	      if (_M_value[0] == '-'
 		  && !(_M_flags & regex_constants::ECMAScript))
-		__throw_regex_error(regex_constants::error_range);
-	      __matcher._M_add_char(_M_value[0]);
+		{
+		  if (_M_match_token(_ScannerT::_S_token_bracket_end))
+		    return false;
+		  __throw_regex_error(regex_constants::error_range);
+		}
 	      __last_char.first = true;
 	      __last_char.second = _M_value[0];
 	    }
@@ -496,6 +509,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 						     _M_value[0]));
       else
 	__throw_regex_error(regex_constants::error_brack);
+
+      return true;
     }
 
   template<typename _TraitsT>
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
index f7653c6..62131a0 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
@@ -82,6 +82,22 @@  test02()
     VERIFY(e.code() == std::regex_constants::error_range);
   }
   std::regex re("[-----]", std::regex::ECMAScript);
+
+  VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended)));
+  VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended)));
+  VERIFY(regex_match("b", regex("[^-ac]", regex_constants::extended)));
+  VERIFY(regex_match("b", regex("[^ac-]", regex_constants::extended)));
+  VERIFY(regex_match("&", regex("[%--]", regex_constants::extended)));
+  VERIFY(regex_match(".", regex("[--@]", regex_constants::extended)));
+  try
+  {
+    regex("[a--@]", regex_constants::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+  }
+  VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended)));
 }
 
 void
@@ -115,6 +131,44 @@  test04()
   VERIFY(regex_match_debug("w", re));
 }
 
+// libstdc++/67015
+void
+test05()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex lanana_namespace("^[a-z0-9]+$", regex::extended);
+  regex lsb_namespace("^_?([a-z0-9_.]+-, regex::extended)+[a-z0-9]+$");
+  regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp, regex::extended)$");
+  regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$", regex::extended);
+  VERIFY(regex_match("test", debian_cron_namespace));
+  VERIFY(!regex_match("-a", debian_cron_namespace));
+  VERIFY(regex_match("a-", debian_cron_namespace));
+  regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$", regex::extended);
+  VERIFY(regex_match("test", debian_cron_namespace_ok));
+  VERIFY(!regex_match("-a", debian_cron_namespace_ok));
+  VERIFY(regex_match("a-", debian_cron_namespace_ok));
+}
+
+// libstdc++/67015
+void
+test06()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex lanana_namespace("^[a-z0-9]+$");
+  regex lsb_namespace("^_?([a-z0-9_.]+-)+[a-z0-9]+$");
+  regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp)$");
+  regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$");
+  VERIFY(regex_match("test", debian_cron_namespace));
+  VERIFY(!regex_match("-a", debian_cron_namespace));
+  VERIFY(regex_match("a-", debian_cron_namespace));
+  regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$");
+  VERIFY(regex_match("test", debian_cron_namespace_ok));
+  VERIFY(!regex_match("-a", debian_cron_namespace_ok));
+  VERIFY(regex_match("a-", debian_cron_namespace_ok));
+}
+
 int
 main()
 {
@@ -122,5 +176,8 @@  main()
   test02();
   test03();
   test04();
+  test05();
+  test06();
+
   return 0;
 }