Patchwork [2/4] std::regex refactoring

login
register
mail settings
Submitter Jonathan Wakely
Date Nov. 8, 2013, 2:30 p.m.
Message ID <CAH6eHdRZveh3zzfk2NAqqY2=ZDSMtMeDjjXgK3PrhCdqq2E-vA@mail.gmail.com>
Download mbox | patch
Permalink /patch/289849/
State New
Headers show

Comments

Jonathan Wakely - Nov. 8, 2013, 2:30 p.m.
This removes the redundant _CharT template parameters that can be
obtained from the _Traits parameters.  It also adds the __compile_nfa
function to create a _Compiler (deducing its template arguments) and
get the _NFA from it.

2013-11-08  Jonathan Wakely  <jwakely.gcc@gmail.com>

        * include/bits/regex_automaton.h (__detail::_State, __detail::_NFA,
        __detail::_StateSeq): Remove redundant _CharT template parameters.
        * include/bits/regex_automaton.tcc: Likewise.
        * include/bits/regex_compiler.h (__detail::_Compiler): Likewise.
        (__compile_nfa): Add object generator for _Compiler.
        * include/bits/regex_compiler.tcc: Remove _CharT template parameters.
        * include/bits/regex_executor.h: Likewise.
        * include/bits/regex_executor.tcc: Likewise.
        * include/bits/regex.h (basic_regex): Assert char_type matches. Use
        __compile_nfa object generator. Remove _CharT template parameter.

Tested x86_64-linux, committed to trunk.
commit adf28b3b99b86f1cab7d2f03930eb0f7ede04ed8
Author: Jonathan Wakely <jwakely.gcc@gmail.com>
Date:   Fri Nov 8 12:11:04 2013 +0000

    	* include/bits/regex_automaton.h (__detail::_State, __detail::_NFA,
    	__detail::_StateSeq): Remove redundant _CharT template parameters.
    	* include/bits/regex_automaton.tcc: Likewise.
    	* include/bits/regex_compiler.h (__detail::_Compiler): Likewise.
    	(__compile_nfa): Add object generator for _Compiler.
    	* include/bits/regex_compiler.tcc: Remove _CharT template parameters.
    	* include/bits/regex_executor.h: Likewise.
    	* include/bits/regex_executor.tcc: Likewise.
    	* include/bits/regex.h (basic_regex): Assert char_type matches. Use
    	__compile_nfa object generator. Remove _CharT template parameter.

Patch

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index b1bda46..84b8cf1 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -377,10 +377,13 @@  _GLIBCXX_END_NAMESPACE_VERSION
    * Storage for the regular expression is allocated and deallocated as
    * necessary by the member functions of this class.
    */
-  template<typename _Ch_type, typename _Rx_traits = regex_traits<_Ch_type> >
+  template<typename _Ch_type, typename _Rx_traits = regex_traits<_Ch_type>>
     class basic_regex
     {
     public:
+      static_assert(is_same<_Ch_type, typename _Rx_traits::char_type>::value,
+		    "regex traits class must have the same char_type");
+
       // types:
       typedef _Ch_type                            value_type;
       typedef _Rx_traits                          traits_type;
@@ -498,8 +501,8 @@  _GLIBCXX_END_NAMESPACE_VERSION
 	basic_regex(_FwdIter __first, _FwdIter __last,
 		    flag_type __f = ECMAScript)
 	: _M_flags(__f),
-	  _M_automaton(__detail::_Compiler<_FwdIter, _Ch_type, _Rx_traits>
-		       (__first, __last, _M_traits, _M_flags)._M_get_nfa())
+	  _M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
+					       _M_flags))
 	{ }
 
       /**
@@ -634,9 +637,8 @@  _GLIBCXX_END_NAMESPACE_VERSION
 	       flag_type __flags = ECMAScript)
 	{
 	  _M_flags = __flags;
-	  _M_automaton =
-	    __detail::_Compiler<decltype(__s.begin()), _Ch_type, _Rx_traits>
-	    (__s.begin(), __s.end(), _M_traits, _M_flags)._M_get_nfa();
+	  _M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
+						 _M_traits, _M_flags);
 	  return *this;
 	}
 
@@ -730,8 +732,7 @@  _GLIBCXX_END_NAMESPACE_VERSION
 #endif
 
     protected:
-      typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
-	_AutomatonPtr;
+      typedef std::shared_ptr<__detail::_NFA<_Rx_traits>> _AutomatonPtr;
 
       template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
 	__detail::_RegexExecutorPolicy, bool>
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index ded3716..1be51221 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -103,10 +103,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
   };
 
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     struct _State : _State_base
     {
-      typedef _Matcher<_CharT>           _MatcherT;
+      typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
 
       _MatcherT      _M_matches;        // for _S_opcode_match
 
@@ -155,12 +155,12 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     bool                      _M_has_backref;
   };
 
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     struct _NFA
-    : _NFA_base, std::vector<_State<_CharT, _TraitsT>>
+    : _NFA_base, std::vector<_State<_TraitsT>>
     {
-      typedef _State<_CharT, _TraitsT>		_StateT;
-      typedef _Matcher<_CharT>			_MatcherT;
+      typedef _State<_TraitsT>				_StateT;
+      typedef _Matcher<typename _TraitsT::char_type>	_MatcherT;
 
       using _NFA_base::_NFA_base;
 
@@ -268,11 +268,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// Describes a sequence of one or more %_State, its current start
   /// and end(s).  This structure contains fragments of an NFA during
   /// construction.
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     class _StateSeq
     {
     public:
-      typedef _NFA<_CharT, _TraitsT> _RegexT;
+      typedef _NFA<_TraitsT> _RegexT;
 
     public:
       _StateSeq(_RegexT& __nfa, _StateIdT __s)
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index 0c25c63..b0734cf 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -129,9 +129,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     return __ostr;
   }
 
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     std::ostream&
-    _NFA<_CharT, _TraitsT>::_M_dot(std::ostream& __ostr) const
+    _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
     {
       __ostr << "digraph _Nfa {\n"
 	        "  rankdir=LR;\n";
@@ -142,9 +142,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 #endif
 
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     _StateIdT
-    _NFA<_CharT, _TraitsT>::_M_insert_backref(size_t __index)
+    _NFA<_TraitsT>::_M_insert_backref(size_t __index)
     {
       // To figure out whether a backref is valid, a stack is used to store
       // unfinished sub-expressions. For example, when parsing
@@ -164,9 +164,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return _M_insert_state(std::move(__tmp));
     }
 
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     void
-    _NFA<_CharT, _TraitsT>::_M_eliminate_dummy()
+    _NFA<_TraitsT>::_M_eliminate_dummy()
     {
       for (auto& __it : *this)
 	{
@@ -182,9 +182,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   // Just apply DFS on the sequence and re-link their links.
-  template<typename _CharT, typename _TraitsT>
-    _StateSeq<_CharT, _TraitsT>
-    _StateSeq<_CharT, _TraitsT>::_M_clone()
+  template<typename _TraitsT>
+    _StateSeq<_TraitsT>
+    _StateSeq<_TraitsT>::_M_clone()
     {
       std::map<_StateIdT, _StateIdT> __m;
       std::stack<_StateIdT> __stack;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 98141a7..fef8862 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -43,12 +43,12 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct _BracketMatcher;
 
   /// Builds an NFA from an input iterator interval.
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     class _Compiler
     {
     public:
       typedef typename _TraitsT::string_type      _StringT;
-      typedef _NFA<_CharT, _TraitsT>              _RegexT;
+      typedef _NFA<_TraitsT>              	  _RegexT;
       typedef regex_constants::syntax_option_type _FlagT;
 
       _Compiler(_FwdIter __b, _FwdIter __e,
@@ -59,9 +59,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return make_shared<_RegexT>(std::move(_M_nfa)); }
 
     private:
+      typedef typename _TraitsT::char_type		      _CharT;
       typedef _Scanner<_FwdIter>                              _ScannerT;
       typedef typename _ScannerT::_TokenT                     _TokenT;
-      typedef _StateSeq<_CharT, _TraitsT>                     _StateSeqT;
+      typedef _StateSeq<_TraitsT>                     	      _StateSeqT;
       typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
       typedef _BracketMatcher<_CharT, _TraitsT>               _BMatcherT;
       typedef std::ctype<_CharT>                              _CtypeT;
@@ -129,6 +130,15 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StackT         _M_stack;
     };
 
+  template<typename _FwdIter, typename _TraitsT>
+    inline std::shared_ptr<_NFA<_TraitsT>>
+    __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
+		  regex_constants::syntax_option_type __flags)
+    {
+      using _Cmplr = _Compiler<_FwdIter, _TraitsT>;
+      return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
+    }
+
   template<typename _CharT, typename _TraitsT>
     struct _AnyMatcher
     {
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 58ef0f0..49c32b8 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -59,8 +59,8 @@  namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+  template<typename _FwdIter, typename _TraitsT>
+    _Compiler<_FwdIter, _TraitsT>::
     _Compiler(_FwdIter __b, _FwdIter __e,
 	      const _TraitsT& __traits, _FlagT __flags)
     : _M_flags((__flags
@@ -73,7 +73,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	       ? __flags
 	       : __flags | regex_constants::ECMAScript),
     _M_traits(__traits),
-    _M_ctype(std::use_facet<std::ctype<_CharT>>(_M_traits.getloc())),
+    _M_ctype(std::use_facet<_CtypeT>(_M_traits.getloc())),
     _M_scanner(__b, __e, _M_flags, _M_traits.getloc()),
     _M_nfa(_M_flags)
     {
@@ -89,9 +89,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_nfa._M_eliminate_dummy();
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     void
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_disjunction()
     {
       this->_M_alternative();
@@ -110,9 +110,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     void
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_alternative()
     {
       if (this->_M_term())
@@ -126,9 +126,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_term()
     {
       if (this->_M_assertion())
@@ -141,9 +141,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return false;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_assertion()
     {
       if (_M_match_token(_ScannerT::_S_token_line_begin))
@@ -172,9 +172,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return true;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     void
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_quantifier()
     {
       bool __neg = (_M_flags & regex_constants::ECMAScript);
@@ -278,9 +278,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_atom()
     {
       if (_M_match_token(_ScannerT::_S_token_anychar))
@@ -329,9 +329,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return true;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_bracket_expression()
     {
       bool __neg =
@@ -346,9 +346,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return true;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     void
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_expression_term(_BMatcherT& __matcher)
     {
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
@@ -383,9 +383,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__throw_regex_error(regex_constants::error_brack);
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_try_char()
     {
       bool __is_char = false;
@@ -404,9 +404,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __is_char;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_match_token(_TokenT token)
     {
       if (token == _M_scanner._M_get_token())
@@ -418,9 +418,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return false;
     }
 
-  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _TraitsT>
     int
-    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _TraitsT>::
     _M_cur_int_value(int __radix)
     {
       long __v = 0;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index f08f292..ded7747 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -62,7 +62,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
       typedef regex_constants::match_flag_type              _FlagT;
       typedef typename _TraitsT::char_class_type            _ClassT;
-      typedef _NFA<_CharT, _TraitsT>                        _NFAT;
+      typedef _NFA<_TraitsT>                                _NFAT;
 
     public:
       _Executor(_BiIter         __begin,
@@ -138,10 +138,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 
       bool
-      _M_word_boundary(_State<_CharT, _TraitsT> __state) const;
+      _M_word_boundary(_State<_TraitsT> __state) const;
 
       bool
-      _M_lookahead(_State<_CharT, _TraitsT> __state);
+      _M_lookahead(_State<_TraitsT> __state);
 
     public:
       _ResultsVec                                          _M_cur_results;
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index b310aba..22fd67c 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -143,8 +143,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
     bool __dfs_mode>
     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_lookahead(_State<_Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-		 _CharT, _TraitsT> __state)
+    _M_lookahead(_State<_TraitsT> __state)
     {
       _ResultsVec __what(_M_cur_results.size());
       auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current,
@@ -348,7 +347,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _BiIter, typename _Alloc, typename _TraitsT,
     bool __dfs_mode>
     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
-    _M_word_boundary(_State<_CharT, _TraitsT> __state) const
+    _M_word_boundary(_State<_TraitsT> __state) const
     {
       // By definition.
       bool __ans = false;