diff mbox series

[4/5,_Hashtable] Generalize the small size optimization

Message ID 6472c80c-7ea1-427c-9046-d46fd1121f34@gmail.com
State New
Headers show
Series Optimize insertions | expand

Commit Message

François Dumont Nov. 23, 2023, 9:58 p.m. UTC
libstdc++: [_Hashtable] Extend the small size optimization

     A number of methods were still not using the small size 
optimization which
     is to prefer an O(N) research to a hash computation as long as N is 
small.

     libstdc++-v3/ChangeLog:

             * include/bits/hashtable.h: Move comment about all 
equivalent values
             being next to each other in the class documentation header.
             (_M_reinsert_node, _M_merge_unique): Implement small size 
optimization.
             (_M_find_tr, _M_count_tr, _M_equal_range_tr): Likewise.

Tested under Linux x64

Ok to commit ?

François

Comments

Jonathan Wakely Dec. 21, 2023, 10:23 p.m. UTC | #1
On Thu, 23 Nov 2023 at 22:00, François Dumont <frs.dumont@gmail.com> wrote:
>
>      libstdc++: [_Hashtable] Extend the small size optimization
>
>      A number of methods were still not using the small size
> optimization which
>      is to prefer an O(N) research to a hash computation as long as N is
> small.
>
>      libstdc++-v3/ChangeLog:
>
>              * include/bits/hashtable.h: Move comment about all
> equivalent values
>              being next to each other in the class documentation header.
>              (_M_reinsert_node, _M_merge_unique): Implement small size
> optimization.
>              (_M_find_tr, _M_count_tr, _M_equal_range_tr): Likewise.
>
> Tested under Linux x64
>
> Ok to commit ?

Yes, this one seems safe to commit now. Thanks.
diff mbox series

Patch

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 771ed9968f7..aec77c34a58 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -152,6 +152,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
    *  _M_before_begin, if any, is updated to point to its new before
    *  begin node.
    *
+   *  Note that all equivalent values, if any, are next to each other, if
+   *  we find a non-equivalent value after an equivalent one it means that
+   *  we won't find any new equivalent value.
+   *
    *  On erase, the simple iterator design requires using the hash
    *  functor to get the index of the bucket to update. For this
    *  reason, when __cache_hash_code is set to false the hash functor must
@@ -1054,10 +1058,27 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  {
 	    __glibcxx_assert(get_allocator() == __nh.get_allocator());
 
+	    __node_ptr __n = nullptr;
 	    const key_type& __k = __nh._M_key();
-	    __hash_code __code = this->_M_hash_code(__k);
-	    size_type __bkt = _M_bucket_index(__code);
-	    if (__node_ptr __n = _M_find_node(__bkt, __k, __code))
+	    const size_type __size = size();
+	    if (__size <= __small_size_threshold())
+	      {
+		for (__n = _M_begin(); __n; __n = __n->_M_next())
+		  if (this->_M_key_equals(__k, *__n))
+		    break;
+	      }
+
+	    __hash_code __code;
+	    size_type __bkt;
+	    if (!__n)
+	      {
+		__code = this->_M_hash_code(__k);
+		__bkt = _M_bucket_index(__code);
+		if (__size > __small_size_threshold())
+		  __n = _M_find_node(__bkt, __k, __code);
+	      }
+
+	    if (__n)
 	      {
 		__ret.node = std::move(__nh);
 		__ret.position = iterator(__n);
@@ -1161,11 +1182,31 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
 	    {
 	      auto __pos = __i++;
+	      const size_type __size = size();
 	      const key_type& __k = _ExtractKey{}(*__pos);
+	      if (__size <= __small_size_threshold())
+		{
+		  bool __found = false;
+		  for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+		    if (this->_M_key_equals(__k, *__n))
+		      {
+			__found = true;
+			break;
+		      }
+
+		  if (__found)
+		    {
+		      if (__n_elt != 1)
+			--__n_elt;
+		      continue;
+		    }
+		}
+
 	      __hash_code __code
 		= _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur);
 	      size_type __bkt = _M_bucket_index(__code);
-	      if (_M_find_node(__bkt, __k, __code) == nullptr)
+	      if (__size <= __small_size_threshold()
+		  || _M_find_node(__bkt, __k, __code) == nullptr)
 		{
 		  auto __nh = __src.extract(__pos);
 		  _M_insert_unique_node(__bkt, __code, __nh._M_ptr, __n_elt);
@@ -1743,6 +1784,14 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k)
       -> iterator
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      if (this->_M_key_equals_tr(__k, *__n))
+		return iterator(__n);
+	    return end();
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	return iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1759,6 +1808,14 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k) const
       -> const_iterator
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      if (this->_M_key_equals_tr(__k, *__n))
+		return const_iterator(__n);
+	    return end();
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	return const_iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1782,9 +1839,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return 1;
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       size_type __result = 1;
       for (auto __ref = __it++;
 	   __it._M_cur && this->_M_node_equals(*__ref._M_cur, *__it._M_cur);
@@ -1806,15 +1860,30 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_count_tr(const _Kt& __k) const
       -> size_type
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    size_type __result = 0;
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    ++__result;
+		    continue;
+		  }
+
+		if (__result)
+		  break;
+	      }
+
+	    return __result;
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
 	if (!__n)
 	  return 0;
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	iterator __it(__n);
 	size_type __result = 1;
 	for (++__it;
@@ -1844,9 +1913,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, *__ite._M_cur))
 	++__ite;
 
@@ -1871,9 +1937,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, *__ite._M_cur))
 	++__ite;
 
@@ -1892,6 +1955,25 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k)
       -> pair<iterator, iterator>
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    __node_ptr __n, __beg = nullptr;
+	    for (__n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    if (!__beg)
+		      __beg = __n;
+		    continue;
+		  }
+
+		if (__beg)
+		  break;
+	      }
+
+	    return { iterator(__beg), iterator(__n) };
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1899,9 +1981,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (!__n)
 	  return { __ite, __ite };
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	auto __beg = __ite++;
 	while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
 	  ++__ite;
@@ -1920,6 +1999,25 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k) const
       -> pair<const_iterator, const_iterator>
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    __node_ptr __n, __beg = nullptr;
+	    for (__n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    if (!__beg)
+		      __beg = __n;
+		    continue;
+		  }
+
+		if (__beg)
+		  break;
+	      }
+
+	    return { const_iterator(__beg), const_iterator(__n) };
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1927,9 +2025,6 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (!__n)
 	  return { __ite, __ite };
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	auto __beg = __ite++;
 	while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
 	  ++__ite;
@@ -2058,7 +2153,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	// First build the node to get access to the hash code
 	_Scoped_node __node { this, std::forward<_Args>(__args)...  };
 	const key_type& __k = _ExtractKey{}(__node._M_node->_M_v());
-	if (size() <= __small_size_threshold())
+	const size_type __size = size();
+	if (__size <= __small_size_threshold())
 	  {
 	    for (auto __it = _M_begin(); __it; __it = __it->_M_next())
 	      if (this->_M_key_equals(__k, *__it))
@@ -2068,7 +2164,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 	__hash_code __code = this->_M_hash_code(__k);
 	size_type __bkt = _M_bucket_index(__code);
-	if (size() > __small_size_threshold())
+	if (__size > __small_size_threshold())
 	  if (__node_ptr __p = _M_find_node(__bkt, __k, __code))
 	    // There is already an equivalent node, no insertion
 	    return { iterator(__p), false };
@@ -2231,7 +2327,8 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		       _NodeGenerator& __node_gen)
       -> pair<iterator, bool>
       {
-	if (size() <= __small_size_threshold())
+	const size_type __size = size();
+	if (__size <= __small_size_threshold())
 	  for (auto __it = _M_begin(); __it; __it = __it->_M_next())
 	    if (this->_M_key_equals_tr(__k, *__it))
 	      return { iterator(__it), false };
@@ -2239,7 +2336,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	size_type __bkt = _M_bucket_index(__code);
 
-	if (size() > __small_size_threshold())
+	if (__size > __small_size_threshold())
 	  if (__node_ptr __node = _M_find_node_tr(__bkt, __k, __code))
 	    return { iterator(__node), false };