diff mbox

[libstdc++] Add specific error message into exceptions

Message ID CAG4ZjNmMzR=V2gWskZjB1LGJWcF9iSztfoz8kLtWfVZfBspsnQ@mail.gmail.com
State New
Headers show

Commit Message

Tim Shen Aug. 28, 2015, 5:18 a.m. UTC
Bootstrapped and tested.

Thanks!

Comments

Jonathan Wakely Aug. 28, 2015, 3:59 p.m. UTC | #1
On 27/08/15 22:18 -0700, Tim Shen wrote:
>Bootstrapped and tested.
>
>Thanks!
>
>
>-- 
>Regards,
>Tim Shen

>commit 53c1caff442e97a18652ec8b1d984341168fd98d
>Author: Tim Shen <timshen@google.com>
>Date:   Thu Aug 27 21:42:40 2015 -0700
>
>    	PR libstdc++/67361
>    	* include/bits/regex_error.h: Add __throw_regex_error that
>    	supports string.
>    	* include/bits/regex_automaton.h: Add more specific exception
>    	messages.
>    	* include/bits/regex_automaton.tcc: Likewise.
>    	* include/bits/regex_compiler.h: Likewise.
>    	* include/bits/regex_compiler.tcc: Likewise.
>    	* include/bits/regex_scanner.h: Likewise.
>    	* include/bits/regex_scanner.tcc: Likewise.

Nice, thanks for doing this!

>@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
>       // time, "\\2" is valid, but "\\1" and "\\3" are not.
>       if (__index >= _M_subexpr_count)
>-	__throw_regex_error(regex_constants::error_backref);
>+	__throw_regex_error(
>+	  regex_constants::error_backref,
>+	  "Back-reference index exceeds current sub-expression count.");
>       for (auto __it : this->_M_paren_stack)
> 	if (__index == __it)
>-	  __throw_regex_error(regex_constants::error_backref);
>+	  __throw_regex_error(
>+	    regex_constants::error_backref,
>+	    "Back-reference refered to an opened sub-expression.");

Should be "referred".

And one of the other strings in another throw says "befoer".


>diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
>index 0cb0c04..12ffabe 100644
>--- a/libstdc++-v3/include/bits/regex_compiler.h
>+++ b/libstdc++-v3/include/bits/regex_compiler.h
>@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> 	auto __st = _M_traits.lookup_collatename(__s.data(),
> 						 __s.data() + __s.size());
> 	if (__st.empty())
>-	  __throw_regex_error(regex_constants::error_collate);
>+	  __throw_regex_error(regex_constants::error_collate,
>+			      string("Invalid collate element: "));
> 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
> #ifdef _GLIBCXX_DEBUG
> 	_M_is_ready = false;

There seems to be no need to construct a std::string here, just pass a
const char* (see below).

Also, this string ends in a colon, whereas most end in a period. Any
reason for the difference?


>@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> 	auto __st = _M_traits.lookup_collatename(__s.data(),
> 						 __s.data() + __s.size());
> 	if (__st.empty())
>-	  __throw_regex_error(regex_constants::error_collate);
>+	  __throw_regex_error(regex_constants::error_collate,
>+			      string("Invalid equivalence class."));
> 	__st = _M_traits.transform_primary(__st.data(),
> 					   __st.data() + __st.size());
> 	_M_equiv_set.push_back(__st);

Just pass const char*.

>@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> 						 __s.data() + __s.size(),
> 						 __icase);
> 	if (__mask == 0)
>-	  __throw_regex_error(regex_constants::error_ctype);
>+	  __throw_regex_error(regex_constants::error_collate,
>+			      string("Invalid character class."));
> 	if (!__neg)
> 	  _M_class_set |= __mask;
> 	else

Ditto.

>@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       _M_make_range(_CharT __l, _CharT __r)
>       {
> 	if (__l > __r)
>-	  __throw_regex_error(regex_constants::error_range);
>+	  __throw_regex_error(regex_constants::error_range,
>+			      string("Invalid range in bracket expression."));
> 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
> 					 _M_translator._M_transform(__r)));
> #ifdef _GLIBCXX_DEBUG

Ditto.

>diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
>index 9a62311..019ca42 100644
>--- a/libstdc++-v3/include/bits/regex_compiler.tcc
>+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
>@@ -77,16 +77,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       _M_traits(_M_nfa->_M_traits),
>       _M_ctype(std::use_facet<_CtypeT>(__loc))
>     {
>-      _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
>-      __r._M_append(_M_nfa->_M_insert_subexpr_begin());
>-      this->_M_disjunction();
>-      if (!_M_match_token(_ScannerT::_S_token_eof))
>-	__throw_regex_error(regex_constants::error_paren);
>-      __r._M_append(_M_pop());
>-      _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
>-      __r._M_append(_M_nfa->_M_insert_subexpr_end());
>-      __r._M_append(_M_nfa->_M_insert_accept());
>-      _M_nfa->_M_eliminate_dummy();
>+      __try
>+	{
>+	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
>+	  __r._M_append(_M_nfa->_M_insert_subexpr_begin());
>+	  this->_M_disjunction();
>+	  if (!_M_match_token(_ScannerT::_S_token_eof))
>+	    __throw_regex_error(regex_constants::error_paren,
>+				"Unexpected end of regex.");
>+	  __r._M_append(_M_pop());
>+	  _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
>+	  __r._M_append(_M_nfa->_M_insert_subexpr_end());
>+	  __r._M_append(_M_nfa->_M_insert_accept());
>+	  _M_nfa->_M_eliminate_dummy();
>+	}
>+      __catch(std::regex_error __e)
>+	{
>+	  __throw_regex_error(__e.code(),
>+			      string(__e.what()) + " Location: \""
>+			      + _M_scanner._M_get_location_string() + "\"");
>+	}
>     }
>
>   template<typename _TraitsT>

I wonder if we want to make this more efficient by adding a private
member to regex_error that would allow information to be appended to
the string, rather then creating a new regex_error with a new string.

>diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
>index 778edd5..0dd1fdf 100644
>--- a/libstdc++-v3/include/bits/regex_error.h
>+++ b/libstdc++-v3/include/bits/regex_error.h
>@@ -155,6 +155,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>     regex_constants::error_type
>     code() const
>     { return _M_code; }
>+
>+  private:
>+    regex_error(regex_constants::error_type __ecode, const string& __what)
>+    : std::runtime_error(__what), _M_code(__ecode) { }
>+
>+    friend void __throw_regex_error(regex_constants::error_type, const string&);
>   };
>
>   //@} // group regex
>@@ -162,5 +168,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   void
>   __throw_regex_error(regex_constants::error_type __ecode);
>
>+  inline void
>+  __throw_regex_error(regex_constants::error_type __ecode, const string& __what)
>+  { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
>+
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace std

I suggest adding another overload that takes a const char* rather than
std::string. The reason is that when using the new ABI this function
will take a std::__cxx11::string, so calling it will allocate memory
for the string data, then that string is passed to the regex_error
constructor which has to convert it internally to an old std::string,
which has to allocate a second time.

If there is an overload taking a const char* then that can be passed
to the regex_error constructor and only one allocation will be done.

(I have considered making it possible for exceptions to move the
memory from a new string into an their old string member, but that
isn't done currently.)


>diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
>index b47103e..7795dd2 100644
>--- a/libstdc++-v3/include/bits/regex_scanner.h
>+++ b/libstdc++-v3/include/bits/regex_scanner.h
>@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       _M_get_value() const
>       { return _M_value; }
>
>+      string
>+      _M_get_location_string() const
>+      {
>+	auto __left = std::max(_M_begin, _M_current - 2);
>+	auto __right = std::min(_M_end, _M_current + 3);
>+	static constexpr char __here[] = ">>><<<";

I don't think there's any advantage to using a static here, it doesn't
need to be a global symbol, and with optimisation enabled we get the
same code from just const char __here[] = ">>><<<";


>@@ -247,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       void
>       _M_eat_class(char);
>
>+      _IterT                        _M_begin;
>       _IterT                        _M_current;
>       _IterT                        _M_end;
>       _CtypeT&                      _M_ctype;

This looks like an ABI change, as the size of the type changes.

If I understand correctly this is only needed for the location info,
we could still have nice human readable text in the exceptions without
this, right?
Tim Shen Aug. 28, 2015, 6:23 p.m. UTC | #2
On Fri, Aug 28, 2015 at 8:59 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> There seems to be no need to construct a std::string here, just pass a
> const char* (see below).

To be honest, I wasn't considering performance for a bit, since
exceptions are already considered slow by me :P. But yes, we can do
less allocations.

> I wonder if we want to make this more efficient by adding a private
> member to regex_error that would allow information to be appended to
> the string, rather then creating a new regex_error with a new string.

I can add a helper function to _Scanner to construct the exception
object for only once. For functions that can't access this helper, use
return value for error handling.

> I suggest adding another overload that takes a const char* rather than
> std::string. The reason is that when using the new ABI this function
> will take a std::__cxx11::string, so calling it will allocate memory
> for the string data, then that string is passed to the regex_error
> constructor which has to convert it internally to an old std::string,
> which has to allocate a second time.

First, to make it clear: due to _M_get_location_string(), we need
dynamic allocation.

So is it good to have an owned raw pointer stored in runtime_error,
pointing to a heap allocated char chunk, which will be deallocated in
regex_error's dtor?
Jonathan Wakely Sept. 7, 2015, 11:06 a.m. UTC | #3
On 28/08/15 11:23 -0700, Tim Shen wrote:
>On Fri, Aug 28, 2015 at 8:59 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
>> There seems to be no need to construct a std::string here, just pass a
>> const char* (see below).
>
>To be honest, I wasn't considering performance for a bit, since
>exceptions are already considered slow by me :P. But yes, we can do
>less allocations.
>
>> I wonder if we want to make this more efficient by adding a private
>> member to regex_error that would allow information to be appended to
>> the string, rather then creating a new regex_error with a new string.

In case it wasn't clear, I was suggesting to add a private member
*function* not data member.

>I can add a helper function to _Scanner to construct the exception
>object for only once. For functions that can't access this helper, use
>return value for error handling.
>
>> I suggest adding another overload that takes a const char* rather than
>> std::string. The reason is that when using the new ABI this function
>> will take a std::__cxx11::string, so calling it will allocate memory
>> for the string data, then that string is passed to the regex_error
>> constructor which has to convert it internally to an old std::string,
>> which has to allocate a second time.
>
>First, to make it clear: due to _M_get_location_string(), we need
>dynamic allocation.
>
>So is it good to have an owned raw pointer stored in runtime_error,
>pointing to a heap allocated char chunk, which will be deallocated in
>regex_error's dtor?

No, adding that pointer is an ABI change.

If you can't do it without an ABI change then you will have to lose
the _M_get_location_string() functionality. It seems non-essential
anyway.
Jonathan Wakely Sept. 16, 2015, 5:38 p.m. UTC | #4
On 12/09/15 01:57 +0000, Tim Shen wrote:
>Ok then, let's not appending dynamic location string, but only throw a
>string literal pointer.

This looks great, and a *huge* improvement on the current errors even
without more precise location info.

OK for trunk, thanks very much for doing this.




>commit fc3343a2c719049620447f6dc20191e2af4895f6
>Author: Tim Shen <timshen@google.com>
>Date:   Thu Aug 27 21:42:40 2015 -0700
>
>    	PR libstdc++/67361
>    	* include/bits/regex_error.h: Add __throw_regex_error that
>    	supports string.
>    	* include/bits/regex_automaton.h: Add more specific exception
>    	messages.
>    	* include/bits/regex_automaton.tcc: Likewise.
>    	* include/bits/regex_compiler.h: Likewise.
>    	* include/bits/regex_compiler.tcc: Likewise.
>    	* include/bits/regex_scanner.h: Likewise.
>    	* include/bits/regex_scanner.tcc: Likewise.
>
Tim Shen Sept. 19, 2015, 9 p.m. UTC | #5
On Wed, Sep 16, 2015 at 10:38 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 12/09/15 01:57 +0000, Tim Shen wrote:
>>
>> Ok then, let's not appending dynamic location string, but only throw a
>> string literal pointer.
>
>
> This looks great, and a *huge* improvement on the current errors even
> without more precise location info.

I'm glad to hear this :).

> OK for trunk, thanks very much for doing this.

Tested & Committed as r227936.
diff mbox

Patch

diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index b6ab307..1f672ee 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -327,7 +327,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	this->push_back(std::move(__s));
 	if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
-	  __throw_regex_error(regex_constants::error_space);
+	  __throw_regex_error(
+	    regex_constants::error_space,
+	    "Number of NFA states exceeds limit. Please use shorter regex "
+	    "string, or use smaller brace expression, or make "
+	    "_GLIBCXX_REGEX_STATE_LIMIT larger.");
 	return this->size()-1;
       }
 
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index cecc407..4c541bc 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -149,7 +149,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _NFA<_TraitsT>::_M_insert_backref(size_t __index)
     {
       if (this->_M_flags & regex_constants::__polynomial)
-	__throw_regex_error(regex_constants::error_complexity);
+	__throw_regex_error(regex_constants::error_complexity,
+			    "Unexpected back-reference in polynomial mode.");
       // To figure out whether a backref is valid, a stack is used to store
       // unfinished sub-expressions. For example, when parsing
       // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
@@ -158,10 +159,14 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
       // time, "\\2" is valid, but "\\1" and "\\3" are not.
       if (__index >= _M_subexpr_count)
-	__throw_regex_error(regex_constants::error_backref);
+	__throw_regex_error(
+	  regex_constants::error_backref,
+	  "Back-reference index exceeds current sub-expression count.");
       for (auto __it : this->_M_paren_stack)
 	if (__index == __it)
-	  __throw_regex_error(regex_constants::error_backref);
+	  __throw_regex_error(
+	    regex_constants::error_backref,
+	    "Back-reference refered to an opened sub-expression.");
       this->_M_has_backref = true;
       _StateT __tmp(_S_opcode_backref);
       __tmp._M_backref_index = __index;
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 0cb0c04..12ffabe 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -397,7 +397,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	auto __st = _M_traits.lookup_collatename(__s.data(),
 						 __s.data() + __s.size());
 	if (__st.empty())
-	  __throw_regex_error(regex_constants::error_collate);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid collate element: "));
 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
 #ifdef _GLIBCXX_DEBUG
 	_M_is_ready = false;
@@ -411,7 +412,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	auto __st = _M_traits.lookup_collatename(__s.data(),
 						 __s.data() + __s.size());
 	if (__st.empty())
-	  __throw_regex_error(regex_constants::error_collate);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid equivalence class."));
 	__st = _M_traits.transform_primary(__st.data(),
 					   __st.data() + __st.size());
 	_M_equiv_set.push_back(__st);
@@ -428,7 +430,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 						 __s.data() + __s.size(),
 						 __icase);
 	if (__mask == 0)
-	  __throw_regex_error(regex_constants::error_ctype);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid character class."));
 	if (!__neg)
 	  _M_class_set |= __mask;
 	else
@@ -442,7 +445,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_make_range(_CharT __l, _CharT __r)
       {
 	if (__l > __r)
-	  __throw_regex_error(regex_constants::error_range);
+	  __throw_regex_error(regex_constants::error_range, 
+			      string("Invalid range in bracket expression."));
 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
 					 _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 9a62311..019ca42 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -77,16 +77,26 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_traits(_M_nfa->_M_traits),
       _M_ctype(std::use_facet<_CtypeT>(__loc))
     {
-      _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
-      __r._M_append(_M_nfa->_M_insert_subexpr_begin());
-      this->_M_disjunction();
-      if (!_M_match_token(_ScannerT::_S_token_eof))
-	__throw_regex_error(regex_constants::error_paren);
-      __r._M_append(_M_pop());
-      _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
-      __r._M_append(_M_nfa->_M_insert_subexpr_end());
-      __r._M_append(_M_nfa->_M_insert_accept());
-      _M_nfa->_M_eliminate_dummy();
+      __try
+	{
+	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
+	  __r._M_append(_M_nfa->_M_insert_subexpr_begin());
+	  this->_M_disjunction();
+	  if (!_M_match_token(_ScannerT::_S_token_eof))
+	    __throw_regex_error(regex_constants::error_paren,
+				"Unexpected end of regex.");
+	  __r._M_append(_M_pop());
+	  _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
+	  __r._M_append(_M_nfa->_M_insert_subexpr_end());
+	  __r._M_append(_M_nfa->_M_insert_accept());
+	  _M_nfa->_M_eliminate_dummy();
+	}
+      __catch(std::regex_error __e)
+	{
+	  __throw_regex_error(__e.code(),
+			      string(__e.what()) + " Location: \""
+			      + _M_scanner._M_get_location_string() + "\"");
+	}
     }
 
   template<typename _TraitsT>
@@ -162,7 +172,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  auto __neg = _M_value[0] == 'n';
 	  this->_M_disjunction();
 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
-	    __throw_regex_error(regex_constants::error_paren);
+	    __throw_regex_error(regex_constants::error_paren,
+				"Parenthesis is not closed.");
 	  auto __tmp = _M_pop();
 	  __tmp._M_append(_M_nfa->_M_insert_accept());
 	  _M_stack.push(
@@ -184,7 +195,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto __init = [this, &__neg]()
 	{
 	  if (_M_stack.empty())
-	    __throw_regex_error(regex_constants::error_badrepeat);
+	    __throw_regex_error(regex_constants::error_badrepeat,
+				"Nothing to repeat befoer a quantifier.");
 	  __neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
 	};
       if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -220,9 +232,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       else if (_M_match_token(_ScannerT::_S_token_interval_begin))
 	{
 	  if (_M_stack.empty())
-	    __throw_regex_error(regex_constants::error_badrepeat);
+	    __throw_regex_error(regex_constants::error_badrepeat,
+				"Nothing to repeat befoer a quantifier.");
 	  if (!_M_match_token(_ScannerT::_S_token_dup_count))
-	    __throw_regex_error(regex_constants::error_badbrace);
+	    __throw_regex_error(regex_constants::error_badbrace,
+				"Unexpected token in brace expression.");
 	  _StateSeqT __r(_M_pop());
 	  _StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
 	  long __min_rep = _M_cur_int_value(10);
@@ -238,7 +252,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  else
 	    __n = 0;
 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
-	    __throw_regex_error(regex_constants::error_brace);
+	    __throw_regex_error(regex_constants::error_brace,
+				"Unexpected end of brace expression.");
 
 	  __neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
 
@@ -257,7 +272,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  else
 	    {
 	      if (__n < 0)
-		__throw_regex_error(regex_constants::error_badbrace);
+		__throw_regex_error(regex_constants::error_badbrace,
+				    "Invalid range in brace expression.");
 	      auto __end = _M_nfa->_M_insert_dummy();
 	      // _M_alt is the "match more" branch, and _M_next is the
 	      // "match less" one. Switch _M_alt and _M_next of all created
@@ -324,7 +340,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
 	  this->_M_disjunction();
 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
-	    __throw_regex_error(regex_constants::error_paren);
+	    __throw_regex_error(regex_constants::error_paren,
+				"Parenthesis is not closed.");
 	  __r._M_append(_M_pop());
 	  _M_stack.push(__r);
 	}
@@ -333,7 +350,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
 	  this->_M_disjunction();
 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
-	    __throw_regex_error(regex_constants::error_paren);
+	    __throw_regex_error(regex_constants::error_paren,
+				"Parenthesis is not closed.");
 	  __r._M_append(_M_pop());
 	  __r._M_append(_M_nfa->_M_insert_subexpr_end());
 	  _M_stack.push(__r);
@@ -474,7 +492,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		{
 		  if (_M_match_token(_ScannerT::_S_token_bracket_end))
 		    return false;
-		  __throw_regex_error(regex_constants::error_range);
+		  __throw_regex_error(
+		    regex_constants::error_range,
+		    "Unexpected dash in bracket expression. For POSIX syntax, "
+		    "a dash is not treated literally only when it is at "
+		    "beginning or end.");
 		}
 	      __last_char.first = true;
 	      __last_char.second = _M_value[0];
@@ -492,7 +514,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    {
 		      if (_M_scanner._M_get_token()
 			  != _ScannerT::_S_token_bracket_end)
-			__throw_regex_error(regex_constants::error_range);
+			__throw_regex_error(
+			  regex_constants::error_range,
+			  "Unexpected end of bracket expression.");
 		      __matcher._M_add_char(_M_value[0]);
 		    }
 		}
@@ -508,7 +532,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 					 _M_ctype.is(_CtypeT::upper,
 						     _M_value[0]));
       else
-	__throw_regex_error(regex_constants::error_brack);
+	__throw_regex_error(regex_constants::error_brack,
+			    "Unexpected character in bracket expression.");
 
       return true;
     }
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..0dd1fdf 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -155,6 +155,12 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     regex_constants::error_type
     code() const
     { return _M_code; }
+
+  private:
+    regex_error(regex_constants::error_type __ecode, const string& __what)
+    : std::runtime_error(__what), _M_code(__ecode) { }
+
+    friend void __throw_regex_error(regex_constants::error_type, const string&);
   };
 
   //@} // group regex
@@ -162,5 +168,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
   void
   __throw_regex_error(regex_constants::error_type __ecode);
 
+  inline void
+  __throw_regex_error(regex_constants::error_type __ecode, const string& __what)
+  { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index b47103e..7795dd2 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -220,6 +220,22 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_get_value() const
       { return _M_value; }
 
+      string
+      _M_get_location_string() const
+      {
+	auto __left = std::max(_M_begin, _M_current - 2);
+	auto __right = std::min(_M_end, _M_current + 3);
+	static constexpr char __here[] = ">>><<<";
+	string __s;
+	__s.reserve(__right - __left + ::strlen(__here));
+	while (__left < _M_current)
+	  __s += _M_ctype.narrow(*__left++, '?');
+	__s += __here;
+	while (__left < __right)
+	  __s += _M_ctype.narrow(*__left++, '?');
+	return __s;
+      }
+
 #ifdef _GLIBCXX_DEBUG
       std::ostream&
       _M_print(std::ostream&);
@@ -247,6 +263,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_eat_class(char);
 
+      _IterT                        _M_begin;
       _IterT                        _M_current;
       _IterT                        _M_end;
       _CtypeT&                      _M_ctype;
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 1555669..d6d667a 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -58,7 +58,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	     typename _Scanner::_IterT __end,
 	     _FlagT __flags, std::locale __loc)
     : _ScannerBase(__flags),
-      _M_current(__begin), _M_end(__end),
+      _M_begin(__begin), _M_current(__begin), _M_end(__end),
       _M_ctype(std::use_facet<_CtypeT>(__loc)),
       _M_eat_escape(_M_is_ecma()
 		    ? &_Scanner::_M_eat_escape_ecma
@@ -108,7 +108,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__c == '\\')
 	{
 	  if (_M_current == _M_end)
-	    __throw_regex_error(regex_constants::error_escape);
+	    __throw_regex_error(
+	      regex_constants::error_escape,
+	      "Unexpected end of regex when escaping.");
 
 	  if (!_M_is_basic()
 	      || (*_M_current != '('
@@ -125,7 +127,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  if (_M_is_ecma() && *_M_current == '?')
 	    {
 	      if (++_M_current == _M_end)
-		__throw_regex_error(regex_constants::error_paren);
+		__throw_regex_error(
+		  regex_constants::error_paren,
+		  "Unexpected end of regex when in an open parenthesis.");
 
 	      if (*_M_current == ':')
 		{
@@ -145,7 +149,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		  _M_value.assign(1, 'n');
 		}
 	      else
-		__throw_regex_error(regex_constants::error_paren);
+		__throw_regex_error(
+		  regex_constants::error_paren,
+		  "Invalid special open parenthesis.");
 	    }
 	  else if (_M_flags & regex_constants::nosubs)
 	    _M_token = _S_token_subexpr_no_group_begin;
@@ -204,14 +210,17 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_scan_in_bracket()
     {
       if (_M_current == _M_end)
-	__throw_regex_error(regex_constants::error_brack);
+	__throw_regex_error(
+	  regex_constants::error_brack,
+	  "Unexpected end of regex when in bracket expression.");
 
       auto __c = *_M_current++;
 
       if (__c == '[')
 	{
 	  if (_M_current == _M_end)
-	    __throw_regex_error(regex_constants::error_brack);
+	    __throw_regex_error(regex_constants::error_brack,
+				"Unexpected character class open bracket.");
 
 	  if (*_M_current == '.')
 	    {
@@ -261,7 +270,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_scan_in_brace()
     {
       if (_M_current == _M_end)
-	__throw_regex_error(regex_constants::error_brace);
+	__throw_regex_error(
+	  regex_constants::error_brace,
+	  "Unexpected end of regex when in brace expression.");
 
       auto __c = *_M_current++;
 
@@ -285,7 +296,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      ++_M_current;
 	    }
 	  else
-	    __throw_regex_error(regex_constants::error_badbrace);
+	    __throw_regex_error(regex_constants::error_badbrace,
+				"Unexpected character in brace expression.");
 	}
       else if (__c == '}')
 	{
@@ -293,7 +305,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  _M_token = _S_token_interval_end;
 	}
       else
-	__throw_regex_error(regex_constants::error_badbrace);
+	__throw_regex_error(regex_constants::error_badbrace,
+			    "Unexpected character in brace expression.");
     }
 
   template<typename _CharT>
@@ -302,7 +315,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_eat_escape_ecma()
     {
       if (_M_current == _M_end)
-	__throw_regex_error(regex_constants::error_escape);
+	__throw_regex_error(regex_constants::error_escape,
+			    "Unexpected end of regex when escaping.");
 
       auto __c = *_M_current++;
       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@@ -336,7 +350,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       else if (__c == 'c')
 	{
 	  if (_M_current == _M_end)
-	    __throw_regex_error(regex_constants::error_escape);
+	    __throw_regex_error(
+	      regex_constants::error_escape,
+	      "Unexpected end of regex when reading control code.");
 	  _M_token = _S_token_ord_char;
 	  _M_value.assign(1, *_M_current++);
 	}
@@ -347,7 +363,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    {
 	      if (_M_current == _M_end
 		  || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
-		__throw_regex_error(regex_constants::error_escape);
+		__throw_regex_error(
+		  regex_constants::error_escape,
+		  "Unexpected end of regex when ascii character.");
 	      _M_value += *_M_current++;
 	    }
 	  _M_token = _S_token_hex_num;
@@ -376,7 +394,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_eat_escape_posix()
     {
       if (_M_current == _M_end)
-	__throw_regex_error(regex_constants::error_escape);
+	__throw_regex_error(regex_constants::error_escape,
+			    "Unexpected end of regex when escaping.");
 
       auto __c = *_M_current;
       auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@@ -401,7 +420,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 #ifdef __STRICT_ANSI__
 	  // POSIX says it is undefined to escape ordinary characters
-	  __throw_regex_error(regex_constants::error_escape);
+	  __throw_regex_error(regex_constants::error_escape,
+			      "Unexpected escape character.");
 #else
 	  _M_token = _S_token_ord_char;
 	  _M_value.assign(1, __c);
@@ -441,7 +461,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  return;
 	}
       else
-	__throw_regex_error(regex_constants::error_escape);
+	__throw_regex_error(regex_constants::error_escape,
+			    "Unexpected escape character.");
     }
 
   // Eats a character class or throws an exception.
@@ -460,9 +481,11 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  || *_M_current++ != ']') // skip ']'
 	{
 	  if (__ch == ':')
-	    __throw_regex_error(regex_constants::error_ctype);
+	    __throw_regex_error(regex_constants::error_ctype,
+				"Unexpected end of character class.");
 	  else
-	    __throw_regex_error(regex_constants::error_collate);
+	    __throw_regex_error(regex_constants::error_collate,
+				"Unexpected end of character class.");
 	}
     }