diff mbox

Reduce regex _M_dfs frame size

Message ID CAG4ZjNnH25cpSXPgFFb3GDHRDyBD2wDdRSgZTbKuVk5oze=9CA@mail.gmail.com
State New
Headers show

Commit Message

Tim Shen Aug. 20, 2016, 10:28 a.m. UTC
I merely split _M_dfs() into small functions to see how it goes. It
turns out to save half of the stack consumption in -O0 without
observable performance impact.

If we want, we can use __attribute__((always_inline)) and
__attribute__((noinline)) to make those handler functions back and
forth for reasons. We know that we are going to do better than the
inliner on complicated recursive functions, right? :)

Thanks!

Comments

Jonathan Wakely Aug. 22, 2016, 5:35 p.m. UTC | #1
On 20/08/16 03:28 -0700, Tim Shen wrote:
>I merely split _M_dfs() into small functions to see how it goes. It
>turns out to save half of the stack consumption in -O0 without
>observable performance impact.

That seems like a good improvement.

>            Split _M_dfs() into smaller functions. This seems don't affect
>            performance, but reduces -O0 stack consumption by half (on my
>            x86_64-linux-gnu). NFC.

I don't think we need the note about performance in the changelog,
It only needs to be a description of what was changed, not why.

>            * regex_executor.h: Add separate function declarations.
>            * regex_executor.tcc: Split _M_dfs() into multiple handler
>            functions.

Please name the new functions here, e.g.

         * regex_executor.h (_M_handle_repeat, _M_handle_subexpr_begin)
         (_M_handle_subexpr_end, _M_handle_line_begin_assertion)
         (_M_handle_line_end_assertion, _M_handle_word_boundary)
         etc. etc.


OK for trunk with the more detailed changelog entry. Thanks!
Tim Shen Aug. 22, 2016, 7:53 p.m. UTC | #2
On Mon, Aug 22, 2016 at 10:35 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 20/08/16 03:28 -0700, Tim Shen wrote:
> OK for trunk with the more detailed changelog entry. Thanks!

Done. Tested on x86_64-linux-gnu (if I remember it correctly back to
several days ago) and committed as r239673.
diff mbox

Patch

diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index ef8aa91..33a68dd 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -109,6 +109,39 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
 
       void
+      _M_handle_repeat(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_begin(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_end(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_line_end_assertion(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_word_boundary(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_match(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_backref(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_accept(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_alternative(_Match_mode, _StateIdT);
+
+      void
       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
 
       bool
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 6bbcb1b..382909f 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -195,213 +195,295 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
     };
 
+  // _M_alt branch is "match once more", while _M_next is "get me out
+  // of this quantifier". Executing _M_next first or _M_alt first don't
+  // mean the same thing, and we need to choose the correct order under
+  // given greedy mode.
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
 	   bool __dfs_mode>
     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
+    _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
     {
-      if (_M_states._M_visited(__i))
-	return;
-
       const auto& __state = _M_nfa[__i];
-      // Every change on _M_cur_results and _M_current will be rolled back after
-      // finishing the recursion step.
-      switch (__state._M_opcode())
+
+      // Greedy.
+      if (!__state._M_neg)
 	{
-	// _M_alt branch is "match once more", while _M_next is "get me out
-	// of this quantifier". Executing _M_next first or _M_alt first don't
-	// mean the same thing, and we need to choose the correct order under
-	// given greedy mode.
-	case _S_opcode_repeat:
-	  {
-	    // Greedy.
-	    if (!__state._M_neg)
-	      {
-		_M_rep_once_more(__match_mode, __i);
-		// If it's DFS executor and already accepted, we're done.
-		if (!__dfs_mode || !_M_has_sol)
-		  _M_dfs(__match_mode, __state._M_next);
-	      }
-	    else // Non-greedy mode
-	      {
-		if (__dfs_mode)
-		  {
-		    // vice-versa.
-		    _M_dfs(__match_mode, __state._M_next);
-		    if (!_M_has_sol)
-		      _M_rep_once_more(__match_mode, __i);
-		  }
-		else
-		  {
-		    // DON'T attempt anything, because there's already another
-		    // state with higher priority accepted. This state cannot
-		    // be better by attempting its next node.
-		    if (!_M_has_sol)
-		      {
-			_M_dfs(__match_mode, __state._M_next);
-			// DON'T attempt anything if it's already accepted. An
-			// accepted state *must* be better than a solution that
-			// matches a non-greedy quantifier one more time.
-			if (!_M_has_sol)
-			  _M_rep_once_more(__match_mode, __i);
-		      }
-		  }
-	      }
-	    }
-	  break;
-	case _S_opcode_subexpr_begin:
-	  {
-	    auto& __res = _M_cur_results[__state._M_subexpr];
-	    auto __back = __res.first;
-	    __res.first = _M_current;
-	    _M_dfs(__match_mode, __state._M_next);
-	    __res.first = __back;
-	  }
-	  break;
-	case _S_opcode_subexpr_end:
-	  {
-	    auto& __res = _M_cur_results[__state._M_subexpr];
-	    auto __back = __res;
-	    __res.second = _M_current;
-	    __res.matched = true;
-	    _M_dfs(__match_mode, __state._M_next);
-	    __res = __back;
-	  }
-	  break;
-	case _S_opcode_line_begin_assertion:
-	  if (_M_at_begin())
+	  _M_rep_once_more(__match_mode, __i);
+	  // If it's DFS executor and already accepted, we're done.
+	  if (!__dfs_mode || !_M_has_sol)
 	    _M_dfs(__match_mode, __state._M_next);
-	  break;
-	case _S_opcode_line_end_assertion:
-	  if (_M_at_end())
-	    _M_dfs(__match_mode, __state._M_next);
-	  break;
-	case _S_opcode_word_boundary:
-	  if (_M_word_boundary() == !__state._M_neg)
-	    _M_dfs(__match_mode, __state._M_next);
-	  break;
-	// Here __state._M_alt offers a single start node for a sub-NFA.
-	// We recursively invoke our algorithm to match the sub-NFA.
-	case _S_opcode_subexpr_lookahead:
-	  if (_M_lookahead(__state._M_alt) == !__state._M_neg)
-	    _M_dfs(__match_mode, __state._M_next);
-	  break;
-	case _S_opcode_match:
-	  if (_M_current == _M_end)
-	    break;
+	}
+      else // Non-greedy mode
+	{
 	  if (__dfs_mode)
 	    {
-	      if (__state._M_matches(*_M_current))
-		{
-		  ++_M_current;
-		  _M_dfs(__match_mode, __state._M_next);
-		  --_M_current;
-		}
+	      // vice-versa.
+	      _M_dfs(__match_mode, __state._M_next);
+	      if (!_M_has_sol)
+		_M_rep_once_more(__match_mode, __i);
 	    }
 	  else
-	    if (__state._M_matches(*_M_current))
-	      _M_states._M_queue(__state._M_next, _M_cur_results);
-	  break;
-	// First fetch the matched result from _M_cur_results as __submatch;
-	// then compare it with
-	// (_M_current, _M_current + (__submatch.second - __submatch.first)).
-	// If matched, keep going; else just return and try another state.
-	case _S_opcode_backref:
-	  {
-	    __glibcxx_assert(__dfs_mode);
-	    auto& __submatch = _M_cur_results[__state._M_backref_index];
-	    if (!__submatch.matched)
-	      break;
-	    auto __last = _M_current;
-	    for (auto __tmp = __submatch.first;
-		 __last != _M_end && __tmp != __submatch.second;
-		 ++__tmp)
-	      ++__last;
-	    if (_M_re._M_automaton->_M_traits.transform(__submatch.first,
-							__submatch.second)
-		== _M_re._M_automaton->_M_traits.transform(_M_current, __last))
-	      {
-		if (__last != _M_current)
-		  {
-		    auto __backup = _M_current;
-		    _M_current = __last;
-		    _M_dfs(__match_mode, __state._M_next);
-		    _M_current = __backup;
-		  }
-		else
-		  _M_dfs(__match_mode, __state._M_next);
-	      }
-	  }
-	  break;
-	case _S_opcode_accept:
-	  if (__dfs_mode)
 	    {
-	      __glibcxx_assert(!_M_has_sol);
-	      if (__match_mode == _Match_mode::_Exact)
-		_M_has_sol = _M_current == _M_end;
-	      else
-		_M_has_sol = true;
-	      if (_M_current == _M_begin
-		  && (_M_flags & regex_constants::match_not_null))
-		_M_has_sol = false;
-	      if (_M_has_sol)
+	      // DON'T attempt anything, because there's already another
+	      // state with higher priority accepted. This state cannot
+	      // be better by attempting its next node.
+	      if (!_M_has_sol)
 		{
-		  if (_M_nfa._M_flags & regex_constants::ECMAScript)
-		    _M_results = _M_cur_results;
-		  else // POSIX
-		    {
-		      __glibcxx_assert(_M_states._M_get_sol_pos());
-		      // Here's POSIX's logic: match the longest one. However
-		      // we never know which one (lhs or rhs of "|") is longer
-		      // unless we try both of them and compare the results.
-		      // The member variable _M_sol_pos records the end
-		      // position of the last successful match. It's better
-		      // to be larger, because POSIX regex is always greedy.
-		      // TODO: This could be slow.
-		      if (*_M_states._M_get_sol_pos() == _BiIter()
-			  || std::distance(_M_begin,
-					   *_M_states._M_get_sol_pos())
-			     < std::distance(_M_begin, _M_current))
-			{
-			  *_M_states._M_get_sol_pos() = _M_current;
-			  _M_results = _M_cur_results;
-			}
-		    }
+		  _M_dfs(__match_mode, __state._M_next);
+		  // DON'T attempt anything if it's already accepted. An
+		  // accepted state *must* be better than a solution that
+		  // matches a non-greedy quantifier one more time.
+		  if (!_M_has_sol)
+		    _M_rep_once_more(__match_mode, __i);
 		}
 	    }
-	  else
+	}
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      auto& __res = _M_cur_results[__state._M_subexpr];
+      auto __back = __res.first;
+      __res.first = _M_current;
+      _M_dfs(__match_mode, __state._M_next);
+      __res.first = __back;
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      auto& __res = _M_cur_results[__state._M_subexpr];
+      auto __back = __res;
+      __res.second = _M_current;
+      __res.matched = true;
+      _M_dfs(__match_mode, __state._M_next);
+      __res = __back;
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_at_begin())
+	_M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_at_end())
+	_M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_word_boundary() == !__state._M_neg)
+	_M_dfs(__match_mode, __state._M_next);
+    }
+
+  // Here __state._M_alt offers a single start node for a sub-NFA.
+  // We recursively invoke our algorithm to match the sub-NFA.
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+      if (_M_lookahead(__state._M_alt) == !__state._M_neg)
+	_M_dfs(__match_mode, __state._M_next);
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      if (_M_current == _M_end)
+	return;
+      if (__dfs_mode)
+	{
+	  if (__state._M_matches(*_M_current))
 	    {
-	      if (_M_current == _M_begin
-		  && (_M_flags & regex_constants::match_not_null))
-		break;
-	      if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
-		if (!_M_has_sol)
-		  {
-		    _M_has_sol = true;
-		    _M_results = _M_cur_results;
-		  }
+	      ++_M_current;
+	      _M_dfs(__match_mode, __state._M_next);
+	      --_M_current;
 	    }
-	  break;
-	case _S_opcode_alternative:
-	  if (_M_nfa._M_flags & regex_constants::ECMAScript)
+	}
+      else
+	if (__state._M_matches(*_M_current))
+	  _M_states._M_queue(__state._M_next, _M_cur_results);
+    }
+
+  // First fetch the matched result from _M_cur_results as __submatch;
+  // then compare it with
+  // (_M_current, _M_current + (__submatch.second - __submatch.first)).
+  // If matched, keep going; else just return and try another state.
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
+    {
+      __glibcxx_assert(__dfs_mode);
+
+      const auto& __state = _M_nfa[__i];
+      auto& __submatch = _M_cur_results[__state._M_backref_index];
+      if (!__submatch.matched)
+	return;
+      auto __last = _M_current;
+      for (auto __tmp = __submatch.first;
+	   __last != _M_end && __tmp != __submatch.second;
+	   ++__tmp)
+	++__last;
+      if (_M_re._M_automaton->_M_traits.transform(__submatch.first,
+						  __submatch.second)
+	  == _M_re._M_automaton->_M_traits.transform(_M_current, __last))
+	{
+	  if (__last != _M_current)
 	    {
-	      // TODO: Fix BFS support. It is wrong.
-	      _M_dfs(__match_mode, __state._M_alt);
-	      // Pick lhs if it matches. Only try rhs if it doesn't.
-	      if (!_M_has_sol)
-		_M_dfs(__match_mode, __state._M_next);
+	      auto __backup = _M_current;
+	      _M_current = __last;
+	      _M_dfs(__match_mode, __state._M_next);
+	      _M_current = __backup;
 	    }
 	  else
+	    _M_dfs(__match_mode, __state._M_next);
+	}
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_accept(_Match_mode __match_mode, _StateIdT __i)
+    {
+      if (__dfs_mode)
+	{
+	  __glibcxx_assert(!_M_has_sol);
+	  if (__match_mode == _Match_mode::_Exact)
+	    _M_has_sol = _M_current == _M_end;
+	  else
+	    _M_has_sol = true;
+	  if (_M_current == _M_begin
+	      && (_M_flags & regex_constants::match_not_null))
+	    _M_has_sol = false;
+	  if (_M_has_sol)
 	    {
-	      // Try both and compare the result.
-	      // See "case _S_opcode_accept:" handling above.
-	      _M_dfs(__match_mode, __state._M_alt);
-	      auto __has_sol = _M_has_sol;
-	      _M_has_sol = false;
-	      _M_dfs(__match_mode, __state._M_next);
-	      _M_has_sol |= __has_sol;
+	      if (_M_nfa._M_flags & regex_constants::ECMAScript)
+		_M_results = _M_cur_results;
+	      else // POSIX
+		{
+		  __glibcxx_assert(_M_states._M_get_sol_pos());
+		  // Here's POSIX's logic: match the longest one. However
+		  // we never know which one (lhs or rhs of "|") is longer
+		  // unless we try both of them and compare the results.
+		  // The member variable _M_sol_pos records the end
+		  // position of the last successful match. It's better
+		  // to be larger, because POSIX regex is always greedy.
+		  // TODO: This could be slow.
+		  if (*_M_states._M_get_sol_pos() == _BiIter()
+		      || std::distance(_M_begin,
+				       *_M_states._M_get_sol_pos())
+			 < std::distance(_M_begin, _M_current))
+		    {
+		      *_M_states._M_get_sol_pos() = _M_current;
+		      _M_results = _M_cur_results;
+		    }
+		}
 	    }
-	  break;
+	}
+      else
+	{
+	  if (_M_current == _M_begin
+	      && (_M_flags & regex_constants::match_not_null))
+	    return;
+	  if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
+	    if (!_M_has_sol)
+	      {
+		_M_has_sol = true;
+		_M_results = _M_cur_results;
+	      }
+	}
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
+    {
+      const auto& __state = _M_nfa[__i];
+
+      if (_M_nfa._M_flags & regex_constants::ECMAScript)
+	{
+	  // TODO: Fix BFS support. It is wrong.
+	  _M_dfs(__match_mode, __state._M_alt);
+	  // Pick lhs if it matches. Only try rhs if it doesn't.
+	  if (!_M_has_sol)
+	    _M_dfs(__match_mode, __state._M_next);
+	}
+      else
+	{
+	  // Try both and compare the result.
+	  // See "case _S_opcode_accept:" handling above.
+	  _M_dfs(__match_mode, __state._M_alt);
+	  auto __has_sol = _M_has_sol;
+	  _M_has_sol = false;
+	  _M_dfs(__match_mode, __state._M_next);
+	  _M_has_sol |= __has_sol;
+	}
+    }
+
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+	   bool __dfs_mode>
+    void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+    _M_dfs(_Match_mode __match_mode, _StateIdT __i)
+    {
+      if (_M_states._M_visited(__i))
+	return;
+
+      switch (_M_nfa[__i]._M_opcode())
+	{
+	case _S_opcode_repeat:
+	  _M_handle_repeat(__match_mode, __i); break;
+	case _S_opcode_subexpr_begin:
+	  _M_handle_subexpr_begin(__match_mode, __i); break;
+	case _S_opcode_subexpr_end:
+	  _M_handle_subexpr_end(__match_mode, __i); break;
+	case _S_opcode_line_begin_assertion:
+	  _M_handle_line_begin_assertion(__match_mode, __i); break;
+	case _S_opcode_line_end_assertion:
+	  _M_handle_line_end_assertion(__match_mode, __i); break;
+	case _S_opcode_word_boundary:
+	  _M_handle_word_boundary(__match_mode, __i); break;
+	case _S_opcode_subexpr_lookahead:
+	  _M_handle_subexpr_lookahead(__match_mode, __i); break;
+	case _S_opcode_match:
+	  _M_handle_match(__match_mode, __i); break;
+	case _S_opcode_backref:
+	  _M_handle_backref(__match_mode, __i); break;
+	case _S_opcode_accept:
+	  _M_handle_accept(__match_mode, __i); break;
+	case _S_opcode_alternative:
+	  _M_handle_alternative(__match_mode, __i); break;
 	default:
 	  __glibcxx_assert(false);
 	}