diff mbox series

[2/3] libstdc++: Optimize __uninitialized_default using memset

Message ID 20240627105217.116315-2-jwakely@redhat.com
State New
Headers show
Series [1/3] libstdc++: Use RAII in <bits/stl_uninitialized.h> | expand

Commit Message

Jonathan Wakely June 27, 2024, 10:39 a.m. UTC
For trivial types std::__uninitialized_default (which is used by
std::uninitialized_value_construct) value-initializes the first element
then copies that to the rest of the range using std::fill.

Tamar is working on improved vectorization for std::fill, but for this
value-initialized case where we just want to fill with zeros it seems
sensible to just ... fill with zeros. We can use memset to do that.

Tested x86_64-linux.

-- >8 --

The current optimized path for __uninitialized_default and
__uninitialized_default_n will use std::fill for trivial types, but we
can just use memset to fill them with zeros instead.

Because these functions are not defined for C++98 at all, we can use
if-constexpr to simplify them and remove the dispatching to members of
class template specializations.

libstdc++-v3/ChangeLog:

	* include/bits/stl_uninitialized.h (__uninitialized_default_1)
	(__uninitialized_default_n_1): Remove.
	(__uninitialized_default, __uninitialized_default_n): Use memset
	for contiguous ranges of trivial types.
	* testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc:
	Check negative size.
---
 libstdc++-v3/include/bits/stl_uninitialized.h | 159 ++++++++----------
 .../uninitialized_value_construct_n/sizes.cc  |  13 ++
 2 files changed, 87 insertions(+), 85 deletions(-)

Comments

Jonathan Wakely June 27, 2024, 11:46 a.m. UTC | #1
On Thu, 27 Jun 2024 at 11:53, Jonathan Wakely <jwakely@redhat.com> wrote:
>
> For trivial types std::__uninitialized_default (which is used by
> std::uninitialized_value_construct) value-initializes the first element
> then copies that to the rest of the range using std::fill.
>
> Tamar is working on improved vectorization for std::fill, but for this
> value-initialized case where we just want to fill with zeros it seems
> sensible to just ... fill with zeros. We can use memset to do that.

Oops, I misremembered. Tamar is working on std::find not std::fill, so
that isn't relevant to this change to use memset for all trivial
types.

The current optimization to use std::fill in
std::uninitialized_value_construct only helps for trivial types of
size 1, because otherwise std::fill isn't optimized to memset. So an
alternative solution for std::uninitialized_value_construct of trivial
types would be to convert the contiguous iterators to char* (via
reinterpret_cast) and then use std::fill(f, l, '\0') which would then
use memset. Rather than relying on that detail of std::fill, I think
it makes sense just to do the memset directly where we know it's valid
for trivial types of any size. And that can be generalized for
non-common ranges, so can be used for
ranges::uninitialized_value_construct, which isn't the case if we
defer to std::fill.



>
> Tested x86_64-linux.
>
> -- >8 --
>
> The current optimized path for __uninitialized_default and
> __uninitialized_default_n will use std::fill for trivial types, but we
> can just use memset to fill them with zeros instead.
>
> Because these functions are not defined for C++98 at all, we can use
> if-constexpr to simplify them and remove the dispatching to members of
> class template specializations.
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/stl_uninitialized.h (__uninitialized_default_1)
>         (__uninitialized_default_n_1): Remove.
>         (__uninitialized_default, __uninitialized_default_n): Use memset
>         for contiguous ranges of trivial types.
>         * testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc:
>         Check negative size.
> ---
>  libstdc++-v3/include/bits/stl_uninitialized.h | 159 ++++++++----------
>  .../uninitialized_value_construct_n/sizes.cc  |  13 ++
>  2 files changed, 87 insertions(+), 85 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h b/libstdc++-v3/include/bits/stl_uninitialized.h
> index a9965f26269..1216b319f66 100644
> --- a/libstdc++-v3/include/bits/stl_uninitialized.h
> +++ b/libstdc++-v3/include/bits/stl_uninitialized.h
> @@ -61,6 +61,7 @@
>  #endif
>
>  #include <bits/stl_algobase.h>    // copy
> +#include <bits/ptr_traits.h>      // __to_address
>  #include <ext/alloc_traits.h>     // __alloc_traits
>
>  #if __cplusplus >= 201703L
> @@ -590,89 +591,72 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    // Extensions: __uninitialized_default, __uninitialized_default_n,
>    // __uninitialized_default_a, __uninitialized_default_n_a.
>
> -  template<bool _TrivialValueType>
> -    struct __uninitialized_default_1
> -    {
> -      template<typename _ForwardIterator>
> -        static void
> -        __uninit_default(_ForwardIterator __first, _ForwardIterator __last)
> -        {
> -         _UninitDestroyGuard<_ForwardIterator> __guard(__first);
> -         for (; __first != __last; ++__first)
> -           std::_Construct(std::__addressof(*__first));
> -         __guard.release();
> -       }
> -    };
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wc++17-extensions"
>
> -  template<>
> -    struct __uninitialized_default_1<true>
> +  // If we can value-initialize *__first using memset then return
> +  // std::to_address(__first), otherwise return nullptr.
> +  template<typename _ForwardIterator>
> +    _GLIBCXX20_CONSTEXPR
> +    inline void*
> +    __ptr_for_trivial_zero_init(_ForwardIterator __first)
>      {
> -      template<typename _ForwardIterator>
> -        static void
> -        __uninit_default(_ForwardIterator __first, _ForwardIterator __last)
> -        {
> -         if (__first == __last)
> -           return;
> +#ifdef __cpp_lib_is_constant_evaluated
> +      if (std::is_constant_evaluated())
> +       return nullptr; // Cannot memset during constant evaluation.
> +#endif
>
> -         typename iterator_traits<_ForwardIterator>::value_type* __val
> -           = std::__addressof(*__first);
> -         std::_Construct(__val);
> -         if (++__first != __last)
> -           std::fill(__first, __last, *__val);
> -       }
> -    };
> -
> -  template<bool _TrivialValueType>
> -    struct __uninitialized_default_n_1
> -    {
> -      template<typename _ForwardIterator, typename _Size>
> -       _GLIBCXX20_CONSTEXPR
> -        static _ForwardIterator
> -        __uninit_default_n(_ForwardIterator __first, _Size __n)
> -        {
> -         _UninitDestroyGuard<_ForwardIterator> __guard(__first);
> -         for (; __n > 0; --__n, (void) ++__first)
> -           std::_Construct(std::__addressof(*__first));
> -         __guard.release();
> -         return __first;
> -       }
> -    };
> -
> -  template<>
> -    struct __uninitialized_default_n_1<true>
> -    {
> -      template<typename _ForwardIterator, typename _Size>
> -       _GLIBCXX20_CONSTEXPR
> -        static _ForwardIterator
> -        __uninit_default_n(_ForwardIterator __first, _Size __n)
> -        {
> -         if (__n > 0)
> +#if __cpp_lib_concepts
> +      if constexpr (!contiguous_iterator<_ForwardIterator>)
> +       return nullptr; // Need a raw pointer for memset.
> +#else
> +      if constexpr (!is_pointer<_ForwardIterator>::value)
> +       return nullptr;
> +#endif
> +      else
> +       {
> +         using _ValueType
> +           = typename iterator_traits<_ForwardIterator>::value_type;
> +         // Need value-init to be equivalent to zero-init.
> +         using __value_init_is_zero_init
> +           = __and_<is_trivial<_ValueType>,
> +                    is_trivially_constructible<_ValueType>>;
> +         if constexpr (__value_init_is_zero_init::value)
>             {
> -             typename iterator_traits<_ForwardIterator>::value_type* __val
> -               = std::__addressof(*__first);
> -             std::_Construct(__val);
> -             ++__first;
> -             __first = std::fill_n(__first, __n - 1, *__val);
> +             using _Ptr = decltype(std::__to_address(__first));
> +             // Cannot use memset if _Ptr is cv-qualified.
> +             if constexpr (is_convertible<_Ptr, void*>::value)
> +               return std::__to_address(__first);
>             }
> -         return __first;
>         }
> -    };
> +      return nullptr;
> +    }
>
>    // __uninitialized_default
>    // Fills [first, last) with value-initialized value_types.
>    template<typename _ForwardIterator>
>      inline void
> -    __uninitialized_default(_ForwardIterator __first,
> -                           _ForwardIterator __last)
> +    __uninitialized_default(_ForwardIterator __first, _ForwardIterator __last)
>      {
> -      typedef typename iterator_traits<_ForwardIterator>::value_type
> -       _ValueType;
> -      // trivial types can have deleted assignment
> -      const bool __assignable = is_copy_assignable<_ValueType>::value;
> +      if constexpr (__is_random_access_iter<_ForwardIterator>::__value)
> +       if (void* __ptr = std::__ptr_for_trivial_zero_init(__first))
> +         {
> +           using _ValueType
> +             = typename iterator_traits<_ForwardIterator>::value_type;
> +           if (auto __dist = __last - __first)
> +             {
> +               __glibcxx_assert(__dist > 0);
> +               const size_t __n = __dist;
> +               __glibcxx_assert(__n < __SIZE_MAX__ / sizeof(_ValueType));
> +               __builtin_memset(__ptr, 0, __n * sizeof(_ValueType));
> +             }
> +           return;
> +         }
>
> -      std::__uninitialized_default_1<__is_trivial(_ValueType)
> -                                    && __assignable>::
> -       __uninit_default(__first, __last);
> +      _UninitDestroyGuard<_ForwardIterator> __guard(__first);
> +      for (; __first != __last; ++__first)
> +       std::_Construct(std::__addressof(*__first));
> +      __guard.release();
>      }
>
>    // __uninitialized_default_n
> @@ -682,23 +666,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      inline _ForwardIterator
>      __uninitialized_default_n(_ForwardIterator __first, _Size __n)
>      {
> -#ifdef __cpp_lib_is_constant_evaluated
> -      if (std::is_constant_evaluated())
> -       return __uninitialized_default_n_1<false>::
> -                __uninit_default_n(__first, __n);
> -#endif
> +      if constexpr (is_integral<_Size>::value)
> +       if constexpr (__is_random_access_iter<_ForwardIterator>::__value)
> +         if (void* __ptr = std::__ptr_for_trivial_zero_init(__first))
> +           {
> +             using _ValueType
> +               = typename iterator_traits<_ForwardIterator>::value_type;
> +             if (__n <= 0)
> +               return __first;
> +             else if (size_t(__n) < __SIZE_MAX__ / sizeof(_ValueType))
> +               {
> +                 __builtin_memset(__ptr, 0, __n * sizeof(_ValueType));
> +                 return __first + __n;
> +               }
> +           }
>
> -      typedef typename iterator_traits<_ForwardIterator>::value_type
> -       _ValueType;
> -      // See uninitialized_fill_n for the conditions for using std::fill_n.
> -      constexpr bool __can_fill
> -       = __and_<is_integral<_Size>, is_copy_assignable<_ValueType>>::value;
> -
> -      return __uninitialized_default_n_1<__is_trivial(_ValueType)
> -                                        && __can_fill>::
> -       __uninit_default_n(__first, __n);
> +      _UninitDestroyGuard<_ForwardIterator> __guard(__first);
> +      for (; __n > 0; --__n, (void) ++__first)
> +       std::_Construct(std::__addressof(*__first));
> +      __guard.release();
> +      return __first;
>      }
> -
> +#pragma GCC diagnostic pop
>
>    // __uninitialized_default_a
>    // Fills [first, last) with value_types constructed by the allocator
> diff --git a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
> index 7705c6813e3..9c4198c1a98 100644
> --- a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
> +++ b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
> @@ -52,9 +52,22 @@ test02()
>    VERIFY( i[4] == 5 );
>  }
>
> +void
> +test03()
> +{
> +  int i[3] = { 1, 2, 3 };
> +  // The standard defines it in terms of a loop which only runs for positive n.
> +  auto j = std::uninitialized_value_construct_n(i+1, -5);
> +  VERIFY( j == i+1 );
> +  VERIFY( i[0] == 1 );
> +  VERIFY( i[1] == 2 );
> +  VERIFY( i[2] == 3 );
> +}
> +
>  int
>  main()
>  {
>    test01();
>    test02();
> +  test03();
>  }
> --
> 2.45.2
>
diff mbox series

Patch

diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h b/libstdc++-v3/include/bits/stl_uninitialized.h
index a9965f26269..1216b319f66 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -61,6 +61,7 @@ 
 #endif
 
 #include <bits/stl_algobase.h>    // copy
+#include <bits/ptr_traits.h>      // __to_address
 #include <ext/alloc_traits.h>     // __alloc_traits
 
 #if __cplusplus >= 201703L
@@ -590,89 +591,72 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Extensions: __uninitialized_default, __uninitialized_default_n,
   // __uninitialized_default_a, __uninitialized_default_n_a.
 
-  template<bool _TrivialValueType>
-    struct __uninitialized_default_1
-    {
-      template<typename _ForwardIterator>
-        static void
-        __uninit_default(_ForwardIterator __first, _ForwardIterator __last)
-        {
-	  _UninitDestroyGuard<_ForwardIterator> __guard(__first);
-	  for (; __first != __last; ++__first)
-	    std::_Construct(std::__addressof(*__first));
-	  __guard.release();
-	}
-    };
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions"
 
-  template<>
-    struct __uninitialized_default_1<true>
+  // If we can value-initialize *__first using memset then return
+  // std::to_address(__first), otherwise return nullptr.
+  template<typename _ForwardIterator>
+    _GLIBCXX20_CONSTEXPR
+    inline void*
+    __ptr_for_trivial_zero_init(_ForwardIterator __first)
     {
-      template<typename _ForwardIterator>
-        static void
-        __uninit_default(_ForwardIterator __first, _ForwardIterator __last)
-        {
-	  if (__first == __last)
-	    return;
+#ifdef __cpp_lib_is_constant_evaluated
+      if (std::is_constant_evaluated())
+	return nullptr; // Cannot memset during constant evaluation.
+#endif
 
-	  typename iterator_traits<_ForwardIterator>::value_type* __val
-	    = std::__addressof(*__first);
-	  std::_Construct(__val);
-	  if (++__first != __last)
-	    std::fill(__first, __last, *__val);
-	}
-    };
-
-  template<bool _TrivialValueType>
-    struct __uninitialized_default_n_1
-    {
-      template<typename _ForwardIterator, typename _Size>
-	_GLIBCXX20_CONSTEXPR
-        static _ForwardIterator
-        __uninit_default_n(_ForwardIterator __first, _Size __n)
-        {
-	  _UninitDestroyGuard<_ForwardIterator> __guard(__first);
-	  for (; __n > 0; --__n, (void) ++__first)
-	    std::_Construct(std::__addressof(*__first));
-	  __guard.release();
-	  return __first;
-	}
-    };
-
-  template<>
-    struct __uninitialized_default_n_1<true>
-    {
-      template<typename _ForwardIterator, typename _Size>
-	_GLIBCXX20_CONSTEXPR
-        static _ForwardIterator
-        __uninit_default_n(_ForwardIterator __first, _Size __n)
-        {
-	  if (__n > 0)
+#if __cpp_lib_concepts
+      if constexpr (!contiguous_iterator<_ForwardIterator>)
+	return nullptr; // Need a raw pointer for memset.
+#else
+      if constexpr (!is_pointer<_ForwardIterator>::value)
+	return nullptr;
+#endif
+      else
+	{
+	  using _ValueType
+	    = typename iterator_traits<_ForwardIterator>::value_type;
+	  // Need value-init to be equivalent to zero-init.
+	  using __value_init_is_zero_init
+	    = __and_<is_trivial<_ValueType>,
+		     is_trivially_constructible<_ValueType>>;
+	  if constexpr (__value_init_is_zero_init::value)
 	    {
-	      typename iterator_traits<_ForwardIterator>::value_type* __val
-		= std::__addressof(*__first);
-	      std::_Construct(__val);
-	      ++__first;
-	      __first = std::fill_n(__first, __n - 1, *__val);
+	      using _Ptr = decltype(std::__to_address(__first));
+	      // Cannot use memset if _Ptr is cv-qualified.
+	      if constexpr (is_convertible<_Ptr, void*>::value)
+		return std::__to_address(__first);
 	    }
-	  return __first;
 	}
-    };
+      return nullptr;
+    }
 
   // __uninitialized_default
   // Fills [first, last) with value-initialized value_types.
   template<typename _ForwardIterator>
     inline void
-    __uninitialized_default(_ForwardIterator __first,
-			    _ForwardIterator __last)
+    __uninitialized_default(_ForwardIterator __first, _ForwardIterator __last)
     {
-      typedef typename iterator_traits<_ForwardIterator>::value_type
-	_ValueType;
-      // trivial types can have deleted assignment
-      const bool __assignable = is_copy_assignable<_ValueType>::value;
+      if constexpr (__is_random_access_iter<_ForwardIterator>::__value)
+	if (void* __ptr = std::__ptr_for_trivial_zero_init(__first))
+	  {
+	    using _ValueType
+	      = typename iterator_traits<_ForwardIterator>::value_type;
+	    if (auto __dist = __last - __first)
+	      {
+		__glibcxx_assert(__dist > 0);
+		const size_t __n = __dist;
+		__glibcxx_assert(__n < __SIZE_MAX__ / sizeof(_ValueType));
+		__builtin_memset(__ptr, 0, __n * sizeof(_ValueType));
+	      }
+	    return;
+	  }
 
-      std::__uninitialized_default_1<__is_trivial(_ValueType)
-				     && __assignable>::
-	__uninit_default(__first, __last);
+      _UninitDestroyGuard<_ForwardIterator> __guard(__first);
+      for (; __first != __last; ++__first)
+	std::_Construct(std::__addressof(*__first));
+      __guard.release();
     }
 
   // __uninitialized_default_n
@@ -682,23 +666,28 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     inline _ForwardIterator
     __uninitialized_default_n(_ForwardIterator __first, _Size __n)
     {
-#ifdef __cpp_lib_is_constant_evaluated
-      if (std::is_constant_evaluated())
-	return __uninitialized_default_n_1<false>::
-		 __uninit_default_n(__first, __n);
-#endif
+      if constexpr (is_integral<_Size>::value)
+	if constexpr (__is_random_access_iter<_ForwardIterator>::__value)
+	  if (void* __ptr = std::__ptr_for_trivial_zero_init(__first))
+	    {
+	      using _ValueType
+		= typename iterator_traits<_ForwardIterator>::value_type;
+	      if (__n <= 0)
+		return __first;
+	      else if (size_t(__n) < __SIZE_MAX__ / sizeof(_ValueType))
+		{
+		  __builtin_memset(__ptr, 0, __n * sizeof(_ValueType));
+		  return __first + __n;
+		}
+	    }
 
-      typedef typename iterator_traits<_ForwardIterator>::value_type
-	_ValueType;
-      // See uninitialized_fill_n for the conditions for using std::fill_n.
-      constexpr bool __can_fill
-	= __and_<is_integral<_Size>, is_copy_assignable<_ValueType>>::value;
-
-      return __uninitialized_default_n_1<__is_trivial(_ValueType)
-					 && __can_fill>::
-	__uninit_default_n(__first, __n);
+      _UninitDestroyGuard<_ForwardIterator> __guard(__first);
+      for (; __n > 0; --__n, (void) ++__first)
+	std::_Construct(std::__addressof(*__first));
+      __guard.release();
+      return __first;
     }
-
+#pragma GCC diagnostic pop
 
   // __uninitialized_default_a
   // Fills [first, last) with value_types constructed by the allocator
diff --git a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
index 7705c6813e3..9c4198c1a98 100644
--- a/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
+++ b/libstdc++-v3/testsuite/20_util/specialized_algorithms/uninitialized_value_construct_n/sizes.cc
@@ -52,9 +52,22 @@  test02()
   VERIFY( i[4] == 5 );
 }
 
+void
+test03()
+{
+  int i[3] = { 1, 2, 3 };
+  // The standard defines it in terms of a loop which only runs for positive n.
+  auto j = std::uninitialized_value_construct_n(i+1, -5);
+  VERIFY( j == i+1 );
+  VERIFY( i[0] == 1 );
+  VERIFY( i[1] == 2 );
+  VERIFY( i[2] == 3 );
+}
+
 int
 main()
 {
   test01();
   test02();
+  test03();
 }