diff mbox series

[committed] libstdc++: Define std::string::resize_and_overwrite for C++11 and COW string

Message ID 20230817203052.1131293-1-jwakely@redhat.com
State New
Headers show
Series [committed] libstdc++: Define std::string::resize_and_overwrite for C++11 and COW string | expand

Commit Message

Jonathan Wakely Aug. 17, 2023, 8:30 p.m. UTC
Tested x86_64-linux. Pushed to trunk.

-- >8 --

There are several places in the library where we can improve performance
using resize_and_overwrite so it's inconvenient only being able to use
it in C++23 mode, and only for cxx11 strings. This adds it for COW
strings, and also adds __resize_and_overwrite as an extension for C++11
mode.

The new __resize_and_overwrite is available for C++11 and later, so
within the library we can use that consistently even in C++23.  In order
to avoid making a copy (which might not be possible for non-copyable,
non-movable types) the callable is passed to resize_and_overwrite as an
lvalue reference.  Unlike wrapping it in std::ref(op) this ensures that
invoking it as std::move(op)(n, p) will use the correct value category.
It also avoids any overhead that would be added by wrapping it in a
lambda like [&op](auto p, auto n) { return std::move(op)(p, n); }.

Adjust std::format to use the new __resize_and_overwrite, which we can
assume exists because we only use std::basic_string<char> and
std::basic_string<wchar_t>, so no program-defined specializations.

The uses in <experimental/internet> cannot be replaced, because those
are type-dependent on an Allocator template parameter, which could mean
they use program-defined specializations of std::basic_string that don't
have the __resize_and_overwrite extension.

libstdc++-v3/ChangeLog:

	* include/bits/basic_string.h (__resize_and_overwrite): New
	function.
	* include/bits/basic_string.tcc (__resize_and_overwrite): New
	function.
	(resize_and_overwrite): Simplify by using reserve instead of
	growing the string manually. Adjust for C++11 compatibility.
	* include/bits/cow_string.h (resize_and_overwrite): New
	function.
	(__resize_and_overwrite): New function.
	* include/bits/version.def (__cpp_lib_string_resize_and_overwrite):
	Do not depend on cxx11abi.
	* include/bits/version.h: Regenerate.
	* include/std/format (__formatter_fp::_S_resize_and_overwrite):
	Remove.
	(__formatter_fp::format, __formatter_fp::_M_localize): Use
	__resize_and_overwrite instead of _S_resize_and_overwrite.
	* testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc:
	Adjust for C++11 compatibility when included by ...
	* testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite_ext.cc:
	New test.
---
 libstdc++-v3/include/bits/basic_string.h      |   7 ++
 libstdc++-v3/include/bits/basic_string.tcc    |  54 ++++++----
 libstdc++-v3/include/bits/cow_string.h        |  90 ++++++++++++++++
 libstdc++-v3/include/bits/version.def         |   1 -
 libstdc++-v3/include/bits/version.h           |   2 +-
 libstdc++-v3/include/std/format               |  18 +---
 .../capacity/char/resize_and_overwrite.cc     | 101 ++++++++++++++----
 .../capacity/char/resize_and_overwrite_ext.cc |   6 ++
 8 files changed, 217 insertions(+), 62 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite_ext.cc
diff mbox series

Patch

diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index c68e6171aba..e6f94640150 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -1151,6 +1151,13 @@  _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	resize_and_overwrite(size_type __n, _Operation __op);
 #endif
 
+#if __cplusplus >= 201103L
+      /// Non-standard version of resize_and_overwrite for C++11 and above.
+      template<typename _Operation>
+	_GLIBCXX20_CONSTEXPR void
+	__resize_and_overwrite(size_type __n, _Operation __op);
+#endif
+
       /**
        *  Returns the total number of characters that the %string can hold
        *  before needing to allocate more memory.
diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index c759c2f9525..104a517f794 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -561,44 +561,52 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __n;
     }
 
-#if __cplusplus > 202002L
+#ifdef __cpp_lib_string_resize_and_overwrite // C++ >= 23
   template<typename _CharT, typename _Traits, typename _Alloc>
   template<typename _Operation>
+    [[__gnu__::__always_inline__]]
     constexpr void
     basic_string<_CharT, _Traits, _Alloc>::
-    resize_and_overwrite(const size_type __n, _Operation __op)
-    {
-      const size_type __capacity = capacity();
-      _CharT* __p;
-      if (__n > __capacity)
-	{
-	  auto __new_capacity = __n; // Must not allow _M_create to modify __n.
-	  __p = _M_create(__new_capacity, __capacity);
-	  this->_S_copy(__p, _M_data(), length()); // exclude trailing null
-#if __cpp_lib_is_constant_evaluated
-	  if (std::is_constant_evaluated())
-	    traits_type::assign(__p + length(), __n - length(), _CharT());
+    __resize_and_overwrite(const size_type __n, _Operation __op)
+    { resize_and_overwrite<_Operation&>(__n, __op); }
+#endif
+
+#if __cplusplus >= 201103L
+  template<typename _CharT, typename _Traits, typename _Alloc>
+  template<typename _Operation>
+    _GLIBCXX20_CONSTEXPR void
+    basic_string<_CharT, _Traits, _Alloc>::
+#ifdef __cpp_lib_string_resize_and_overwrite // C++ >= 23
+    resize_and_overwrite(const size_type __n, _Operation __op)
+#else
+    __resize_and_overwrite(const size_type __n, _Operation __op)
+#endif
+    {
+      reserve(__n);
+      _CharT* const __p = _M_data();
+#if __cpp_lib_is_constant_evaluated
+      if (std::__is_constant_evaluated() && __n > size())
+	traits_type::assign(__p + size(), __n - size(), _CharT());
 #endif
-	  _M_dispose();
-	  _M_data(__p);
-	  _M_capacity(__new_capacity);
-	}
-      else
-	__p = _M_data();
       struct _Terminator {
-	constexpr ~_Terminator() { _M_this->_M_set_length(_M_r); }
+	_GLIBCXX20_CONSTEXPR ~_Terminator() { _M_this->_M_set_length(_M_r); }
 	basic_string* _M_this;
 	size_type _M_r;
       };
-      _Terminator __term{this};
-      auto __r = std::move(__op)(auto(__p), auto(__n));
+      _Terminator __term{this, 0};
+      auto __r = std::move(__op)(__p + 0, __n + 0);
+#ifdef __cpp_lib_concepts
       static_assert(ranges::__detail::__is_integer_like<decltype(__r)>);
+#else
+      static_assert(__gnu_cxx::__is_integer_nonstrict<decltype(__r)>::__value,
+		    "resize_and_overwrite operation must return an integer");
+#endif
       _GLIBCXX_DEBUG_ASSERT(__r >= 0 && __r <= __n);
       __term._M_r = size_type(__r);
       if (__term._M_r > __n)
 	__builtin_unreachable();
     }
-#endif // C++23
+#endif // C++11
 
 #endif  // _GLIBCXX_USE_CXX11_ABI
    
diff --git a/libstdc++-v3/include/bits/cow_string.h b/libstdc++-v3/include/bits/cow_string.h
index bf676ed40c5..5411dfe32a9 100644
--- a/libstdc++-v3/include/bits/cow_string.h
+++ b/libstdc++-v3/include/bits/cow_string.h
@@ -965,6 +965,48 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #pragma GCC diagnostic pop
 #endif
 
+#ifdef __cpp_lib_string_resize_and_overwrite // C++ >= 23
+      /** Resize the string and call a function to fill it.
+       *
+       * @param __n   The maximum size requested.
+       * @param __op  A callable object that writes characters to the string.
+       *
+       * This is a low-level function that is easy to misuse, be careful.
+       *
+       * Calling `str.resize_and_overwrite(n, op)` will reserve at least `n`
+       * characters in `str`, evaluate `n2 = std::move(op)(str.data(), n)`,
+       * and finally set the string length to `n2` (adding a null terminator
+       * at the end). The function object `op` is allowed to write to the
+       * extra capacity added by the initial reserve operation, which is not
+       * allowed if you just call `str.reserve(n)` yourself.
+       *
+       * This can be used to efficiently fill a `string` buffer without the
+       * overhead of zero-initializing characters that will be overwritten
+       * anyway.
+       *
+       * The callable `op` must not access the string directly (only through
+       * the pointer passed as its first argument), must not write more than
+       * `n` characters to the string, must return a value no greater than `n`,
+       * and must ensure that all characters up to the returned length are
+       * valid after it returns (i.e. there must be no uninitialized values
+       * left in the string after the call, because accessing them would
+       * have undefined behaviour). If `op` exits by throwing an exception
+       * the behaviour is undefined.
+       *
+       * @since C++23
+       */
+      template<typename _Operation>
+	void
+	resize_and_overwrite(size_type __n, _Operation __op);
+#endif // __cpp_lib_string_resize_and_overwrite
+
+#if __cplusplus >= 201103L
+      /// Non-standard version of resize_and_overwrite for C++11 and above.
+      template<typename _Operation>
+	void
+	__resize_and_overwrite(size_type __n, _Operation __op);
+#endif
+
       /**
        *  Returns the total number of characters that the %string can hold
        *  before needing to allocate more memory.
@@ -3711,6 +3753,54 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // 21.3.5.7 par 3: do not append null.  (good.)
       return __n;
     }
+
+#ifdef __cpp_lib_string_resize_and_overwrite // C++ >= 23
+  template<typename _CharT, typename _Traits, typename _Alloc>
+  template<typename _Operation>
+    [[__gnu__::__always_inline__]]
+    void
+    basic_string<_CharT, _Traits, _Alloc>::
+    __resize_and_overwrite(const size_type __n, _Operation __op)
+    { resize_and_overwrite<_Operation&>(__n, __op); }
+#endif
+
+#if __cplusplus >= 201103L
+  template<typename _CharT, typename _Traits, typename _Alloc>
+  template<typename _Operation>
+    void
+    basic_string<_CharT, _Traits, _Alloc>::
+#ifdef __cpp_lib_string_resize_and_overwrite // C++ >= 23
+    resize_and_overwrite(const size_type __n, _Operation __op)
+#else
+    __resize_and_overwrite(const size_type __n, _Operation __op)
+#endif
+    {
+      const size_type __capacity = capacity();
+      _CharT* __p;
+      if (__n > __capacity || _M_rep()->_M_is_shared())
+	this->reserve(__n);
+      __p = _M_data();
+      struct _Terminator {
+	~_Terminator() { _M_this->_M_rep()->_M_set_length_and_sharable(_M_r); }
+	basic_string* _M_this;
+	size_type _M_r;
+      };
+      _Terminator __term{this, 0};
+      auto __r = std::move(__op)(__p + 0, __n + 0);
+#ifdef __cpp_lib_concepts
+      static_assert(ranges::__detail::__is_integer_like<decltype(__r)>);
+#else
+      static_assert(__gnu_cxx::__is_integer_nonstrict<decltype(__r)>::__value,
+		    "resize_and_overwrite operation must return an integer");
+#endif
+      _GLIBCXX_DEBUG_ASSERT(__r >= 0 && __r <= __n);
+      __term._M_r = size_type(__r);
+      if (__term._M_r > __n)
+	__builtin_unreachable();
+    }
+#endif // C++11
+
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 #endif  // ! _GLIBCXX_USE_CXX11_ABI
diff --git a/libstdc++-v3/include/bits/version.def b/libstdc++-v3/include/bits/version.def
index 1383708b2d7..e20dd3e3c0b 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1579,7 +1579,6 @@  ftms = {
     v = 202110;
     cxxmin = 23;
     hosted = yes;
-    cxx11abi = yes;
   };
 };
 
diff --git a/libstdc++-v3/include/bits/version.h b/libstdc++-v3/include/bits/version.h
index e87f0884c9c..7a276de70ef 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1930,7 +1930,7 @@ 
 
 // from version.def line 1577
 #if !defined(__cpp_lib_string_resize_and_overwrite)
-# if (__cplusplus >= 202302L) && _GLIBCXX_USE_CXX11_ABI && _GLIBCXX_HOSTED
+# if (__cplusplus >= 202302L) && _GLIBCXX_HOSTED
 #  define __glibcxx_string_resize_and_overwrite 202110L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_string_resize_and_overwrite)
 #   define __cpp_lib_string_resize_and_overwrite 202110L
diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 40c7d6128f6..0d7d3d16420 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1519,8 +1519,8 @@  namespace __format
 		    return __res.ec == errc{} ? __res.ptr - __p : 0;
 		  };
 
-		  _S_resize_and_overwrite(__dynbuf, __dynbuf.capacity() * 2,
-					  __overwrite);
+		  __dynbuf.__resize_and_overwrite(__dynbuf.capacity() * 2,
+						  __overwrite);
 		  __start = __dynbuf.data() + 1; // reserve space for sign
 		  __end = __dynbuf.data() + __dynbuf.size();
 		}
@@ -1727,22 +1727,10 @@  namespace __format
 	    }
 	  return (__end - __p);
 	};
-	_S_resize_and_overwrite(__lstr, __e * 2 + __r, __overwrite);
+	__lstr.__resize_and_overwrite(__e * 2 + __r, __overwrite);
 	return __lstr;
       }
 
-      template<typename _Ch, typename _Func>
-	static void
-	_S_resize_and_overwrite(basic_string<_Ch>& __str, size_t __n, _Func __f)
-	{
-#if __cpp_lib_string_resize_and_overwrite
-	  __str.resize_and_overwrite(__n, __f);
-#else
-	  __str.resize(__n);
-	  __str.resize(__f(__str.data(), __n));
-#endif
-	}
-
       _Spec<_CharT> _M_spec{};
     };
 
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
index 0ea5e2b10ef..3265ebc3486 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
@@ -1,13 +1,15 @@ 
 // { dg-options "-std=gnu++23" }
-// { dg-do run { target { c++23 && cxx11_abi } } }
+// { dg-do run { target c++23 } }
 
 #include <string>
 
+#if __cplusplus >= 202302L
 #ifndef __cpp_lib_string_resize_and_overwrite
 #error "Feature test macro for resize_and_overwrite is missing in <string>"
 #elif __cpp_lib_string_resize_and_overwrite != 202110L
 # error "Feature test macro for resize_and_overwrite has wrong value in <string>"
 #endif
+#endif
 
 
 #include <cstring>
@@ -34,8 +36,8 @@  test01()
 
   s.resize_and_overwrite(50, [](char* p, int n) -> unsigned {
     VERIFY( n == 50 );
-    VERIFY( !std::strncmp(p, "monkey", 3) );
-    std::strcpy(p, "Partridge among the pidgeons");
+    VERIFY( !std::strncmp(p, "monkey", 6) );
+    std::strcpy(p, "Partridge among the pigeons");
     return 9;
   });
   VERIFY( s.data() == str ); // No reallocation
@@ -48,21 +50,21 @@  test02()
 {
   std::string s;
   auto p = s.data();
-  s.resize_and_overwrite(0, [](auto...) { return 0; });
+  s.resize_and_overwrite(0, [](char*, int) { return 0; });
   VERIFY( s.empty() );
   VERIFY( s[0] == '\0' );
   VERIFY( s.data() == p );
 
   s = "short string";
   p = s.data();
-  s.resize_and_overwrite(0, [](auto...) { return 0; });
+  s.resize_and_overwrite(0, [](char*, int) { return 0; });
   VERIFY( s.empty() );
   VERIFY( s[0] == '\0' );
   VERIFY( s.data() == p );
 
   s = "a string that is long enough to not be a short string";
   p = s.data();
-  s.resize_and_overwrite(0, [](auto...) { return 0; });
+  s.resize_and_overwrite(0, [](char*, int) { return 0; });
   VERIFY( s.empty() );
   VERIFY( s[0] == '\0' );
   VERIFY( s.data() == p );
@@ -71,26 +73,43 @@  test02()
 void
 test03()
 {
+  using size_type = std::string::size_type;
+
+  // Op must be invoked as std::move(op)(p, m) to use &&-qualified overload.
+  // The value category and cv-qualification of the arguments is unspecified.
   struct Op
   {
-    int operator()(char*, int) & = delete;
-    int operator()(char*, int) const & = delete;
-    int operator()(char* p, int n) && { std::memset(p, 'a', n+1); return n; }
-    int operator()(char*, int) const && = delete;
+    int operator()(char* p, size_type n) &&
+    {
+      // N.B. this intentionally writes to p[n], see below, and test06().
+      std::memset(p, 'a', n+1);
+      return n;
+    }
+
+    // Wrong ref-quals:
+    int operator()(char*, size_type) & = delete;
+    int operator()(char*, size_type) const & = delete;
+    int operator()(char*, size_type) const && = delete;
+
+    // Wrong types:
+    int operator()(char* p, int n) && = delete;
+    int operator()(char* p, long n) && = delete;
+    int operator()(char* p, long long n) && = delete;
+
+#ifdef _GLIBCXX_RELEASE
+    // This overload was used prior to the resolution of LWG 3645:
+    // resize_and_overwrite is overspecified to call its callback with lvalues.
+    // A conforming implementation might still pass lvalues to the op, but the
+    // libstdc++ implementation passes prvalues and so this overload should
+    // not be selected now.
+    int operator()(char*&, std::string::size_type&) && = delete; // (*)
+#endif
   };
   std::string s;
   s.resize_and_overwrite(42, Op{});
   VERIFY( s.size() == 42 );
   VERIFY( s == std::string(42, 'a') );
-  VERIFY( s[42] == '\0' );
-
-  s.resize_and_overwrite(0, [](auto p, auto n) {
-    // N.B. these requirements were relaxed by LWG 3645:
-    // resize_and_overwrite is overspecified to call its callback with lvalues
-    static_assert( std::is_same_v<decltype(p), char*> );
-    static_assert( std::is_same_v<decltype(n), std::string::size_type> );
-    return 0;
-  });
+  VERIFY( s[42] == '\0' ); // Callback wrote to p[n] but it's null now.
 }
 
 void
@@ -99,7 +118,7 @@  test04()
   std::string s = "this tests how the library copes with undefined behaviour";
 
   try {
-    s.resize_and_overwrite(13, [](auto...) -> int { throw "undefined"; });
+    s.resize_and_overwrite(13, [](...) -> int { throw "undefined"; });
   } catch (...) {
     // The standard doesn't require this, but we leave the string empty:
     VERIFY( s.size() == 0 );
@@ -110,6 +129,7 @@  test04()
 constexpr bool
 test05()
 {
+#if __cpp_lib_constexpr_string >= 201907
   std::string s;
   s.resize_and_overwrite(20, [](char* p, auto n) {
     *p = '!'; // direct assignment should be OK
@@ -117,6 +137,7 @@  test05()
     return 9;
   });
   VERIFY( s == "constexpr" );
+#endif
   return true;
 }
 
@@ -125,19 +146,54 @@  test06()
 {
   std::string s = "0123456789";
   s.resize_and_overwrite(16, [](char* p, int n) {
+    // Even though s.capacity() == 20 this callback still gets n==16:
     VERIFY( n == 16 );
-    std::char_traits<char>::copy(p + 10, "0123456798", 6);
+    // Standard requires [p, p+n] to be a valid range, so this sets p[16]='6':
+    std::char_traits<char>::copy(p + 10, "0123456798", 7);
     return n;
   });
   VERIFY( s.size() == 16 );
   VERIFY( s == "0123456789012345" );
+  // Although p[16] was written to by the callback, it must be set to '\0'
+  // to maintain the invariant that the string is null-terminated:
+  VERIFY( s[16] == '\0' );
 
   s.resize_and_overwrite(4, [](char* p, int n) {
     VERIFY( n == 4 );
-    std::char_traits<char>::copy(p, "abcd", 4);
+    std::char_traits<char>::copy(p, "abcde", 5); // Writes to p[n].
     return n;
   });
   VERIFY( s.size() == 4 );
+  VERIFY( s[4] == '\0' );
+
+  std::string short_string;
+  // For the SSO string this won't need to allocate anything,
+  // but all the same checks should pass.
+  short_string.resize_and_overwrite(4, [](char* p, int n) {
+    VERIFY( n == 4 );
+    std::char_traits<char>::copy(p, "abcde", 5); // Writes to p[n].
+    return n;
+  });
+  VERIFY( short_string.size() == 4 );
+  VERIFY( short_string[4] == '\0' );
+
+}
+
+void
+test07()
+{
+#if __cpp_guaranteed_copy_elision
+  // Non-copyable, non-movable type can be used as the callable.
+  struct Op
+  {
+    Op() = default;
+    Op(Op&&) = delete;
+    Op& operator=(Op&&) = delete;
+    int operator()(char* p, int n) && { return 0; }
+  };
+  std::string s;
+  s.resize_and_overwrite(0, Op());
+#endif
 }
 
 int main()
@@ -148,4 +204,5 @@  int main()
   test04();
   static_assert( test05() );
   test06();
+  test07();
 }
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite_ext.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite_ext.cc
new file mode 100644
index 00000000000..ebc2afbe1a6
--- /dev/null
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite_ext.cc
@@ -0,0 +1,6 @@ 
+// { dg-do run { target c++11 } }
+
+#include <string>
+// Run ./resize_and_overwrite.cc tests using __resize_and_overwrite instead.
+#define resize_and_overwrite __resize_and_overwrite
+#include "resize_and_overwrite.cc"