diff mbox

hashtable optimization

Message ID 54E45D4C.4010109@gmail.com
State New
Headers show

Commit Message

François Dumont Feb. 18, 2015, 9:37 a.m. UTC
With patch.

On 18/02/2015 10:35, François Dumont wrote:
> Hello
>
>     I am still studying hashtable performances and especially how to 
> reduce overhead compared to tr1 implementation. Most of the overhead 
> is coming from the additional modulo operations required with the new 
> data model. Having a closer look at PR 57885 bench I realized that we 
> can quite easily avoid an important modulo operation in 
> _M_insert_bucket_begin thanks to an additional std::size_t in the 
> container.
>
>     The patch is quite straightforward, it optimizes insertion of a 
> node in an empty bucket which is the most important kind of insertion 
> as long as hash functor is doing a good job as per default we should 
> have at most 1 element per buckets:
>
> Without patch:
> Container:std::unordered_map<int,int>  Key:int
> Insertion: 1106 671 634 634 635  min=634 max=1106
>
> Container:std::tr1::unordered_map<int,int>  Key:int
> Insertion: 885 491 487 490 511  min=487 max=885
>
> With patch:
> Container:std::unordered_map<int,int>  Key:int
> Insertion: 1099 581 583 584 592  min=581 max=1099
>
> Container:std::tr1::unordered_map<int,int>  Key:int
> Insertion: 889 491 519 492 493  min=491 max=889
>
>     I prefer to propose it now because it impacts ABI.
>
> 2015-02-19  François Dumont  <fdumont@gcc.gnu.org>
>
>     * include/bits/hashtable.h (_Hashtable<>::_M_bbegin_bkt): New, bucket
>     pointing to _M_before_begin.
>     (_Hashtable<>): Maintain and use later.
>
> Tested under Linux x86_64.
>
> François
>
diff mbox

Patch

Index: include/bits/hashtable.h
===================================================================
--- include/bits/hashtable.h	(revision 220780)
+++ include/bits/hashtable.h	(working copy)
@@ -324,6 +324,9 @@ 
       // numerous checks in the code to avoid 0 modulus.
       __bucket_type		_M_single_bucket	= nullptr;
 
+      // Cache index of the bucket pointing to _M_before_begin
+      size_type			_M_bbegin_bkt;
+
       bool
       _M_uses_single_bucket(__bucket_type* __bkts) const
       { return __builtin_expect(__bkts == &_M_single_bucket, false); }
@@ -965,7 +968,8 @@ 
 	    __node_type* __this_n = __node_gen(__ht_n);
 	    this->_M_copy_code(__this_n, __ht_n);
 	    _M_before_begin._M_nxt = __this_n;
-	    _M_buckets[_M_bucket_index(__this_n)] = &_M_before_begin;
+	    _M_bbegin_bkt = _M_bucket_index(__this_n);
+	    _M_buckets[_M_bbegin_bkt] = &_M_before_begin;
 
 	    // Then deal with other nodes.
 	    __node_base* __prev_n = __this_n;
@@ -1029,12 +1033,13 @@ 
       _M_bucket_count = __ht._M_bucket_count;
       _M_before_begin._M_nxt = __ht._M_before_begin._M_nxt;
       _M_element_count = __ht._M_element_count;
+      _M_bbegin_bkt = __ht._M_bbegin_bkt;
       std::__alloc_on_move(this->_M_node_allocator(), __ht._M_node_allocator());
 
       // Fix buckets containing the _M_before_begin pointers that can't be
       // moved.
       if (_M_begin())
-	_M_buckets[_M_bucket_index(_M_begin())] = &_M_before_begin;
+	_M_buckets[_M_bbegin_bkt] = &_M_before_begin;
       __ht._M_reset();
     }
 
@@ -1131,7 +1136,8 @@ 
       _M_bucket_count(__ht._M_bucket_count),
       _M_before_begin(__ht._M_before_begin._M_nxt),
       _M_element_count(__ht._M_element_count),
-      _M_rehash_policy(__ht._M_rehash_policy)
+      _M_rehash_policy(__ht._M_rehash_policy),
+      _M_bbegin_bkt(__ht._M_bbegin_bkt)
     {
       // Update, if necessary, buckets if __ht is using its single bucket.
       if (__ht._M_uses_single_bucket())
@@ -1143,7 +1149,7 @@ 
       // Update, if necessary, bucket pointing to before begin that hasn't
       // moved.
       if (_M_begin())
-	_M_buckets[_M_bucket_index(_M_begin())] = &_M_before_begin;
+	_M_buckets[_M_bbegin_bkt] = &_M_before_begin;
 
       __ht._M_reset();
     }
@@ -1183,7 +1189,8 @@ 
       _M_buckets(nullptr),
       _M_bucket_count(__ht._M_bucket_count),
       _M_element_count(__ht._M_element_count),
-      _M_rehash_policy(__ht._M_rehash_policy)
+      _M_rehash_policy(__ht._M_rehash_policy),
+      _M_bbegin_bkt(__ht._M_bbegin_bkt)
     {
       if (__ht._M_node_allocator() == this->_M_node_allocator())
 	{
@@ -1199,7 +1206,7 @@ 
 	  // Update, if necessary, bucket pointing to before begin that hasn't
 	  // moved.
 	  if (_M_begin())
-	    _M_buckets[_M_bucket_index(_M_begin())] = &_M_before_begin;
+	    _M_buckets[_M_bbegin_bkt] = &_M_before_begin;
 	  __ht._M_reset();
 	}
       else
@@ -1265,15 +1272,15 @@ 
       std::swap(_M_before_begin._M_nxt, __x._M_before_begin._M_nxt);
       std::swap(_M_element_count, __x._M_element_count);
       std::swap(_M_single_bucket, __x._M_single_bucket);
+      std::swap(_M_bbegin_bkt, __x._M_bbegin_bkt);
 
       // Fix buckets containing the _M_before_begin pointers that can't be
       // swapped.
       if (_M_begin())
-	_M_buckets[_M_bucket_index(_M_begin())] = &_M_before_begin;
+	_M_buckets[_M_bbegin_bkt] = &_M_before_begin;
 
       if (__x._M_begin())
-	__x._M_buckets[__x._M_bucket_index(__x._M_begin())]
-	  = &__x._M_before_begin;
+	__x._M_buckets[__x._M_bbegin_bkt] = &__x._M_before_begin;
     }
 
   template<typename _Key, typename _Value,
@@ -1466,7 +1473,8 @@ 
 	  if (__node->_M_nxt)
 	    // We must update former begin bucket that is pointing to
 	    // _M_before_begin.
-	    _M_buckets[_M_bucket_index(__node->_M_next())] = __node;
+	    _M_buckets[_M_bbegin_bkt] = __node;
+	  _M_bbegin_bkt = __bkt;
 	  _M_buckets[__bkt] = &_M_before_begin;
 	}
     }
@@ -1974,7 +1982,7 @@ 
       __bucket_type* __new_buckets = _M_allocate_buckets(__n);
       __node_type* __p = _M_begin();
       _M_before_begin._M_nxt = nullptr;
-      std::size_t __bbegin_bkt = 0;
+
       while (__p)
 	{
 	  __node_type* __next = __p->_M_next();
@@ -1985,8 +1993,8 @@ 
 	      _M_before_begin._M_nxt = __p;
 	      __new_buckets[__bkt] = &_M_before_begin;
 	      if (__p->_M_nxt)
-		__new_buckets[__bbegin_bkt] = __p;
-	      __bbegin_bkt = __bkt;
+		__new_buckets[_M_bbegin_bkt] = __p;
+	      _M_bbegin_bkt = __bkt;
 	    }
 	  else
 	    {
@@ -2016,7 +2024,6 @@ 
 
       __node_type* __p = _M_begin();
       _M_before_begin._M_nxt = nullptr;
-      std::size_t __bbegin_bkt = 0;
       std::size_t __prev_bkt = 0;
       __node_type* __prev_p = nullptr;
       bool __check_bucket = false;
@@ -2064,8 +2071,8 @@ 
 		  _M_before_begin._M_nxt = __p;
 		  __new_buckets[__bkt] = &_M_before_begin;
 		  if (__p->_M_nxt)
-		    __new_buckets[__bbegin_bkt] = __p;
-		  __bbegin_bkt = __bkt;
+		    __new_buckets[_M_bbegin_bkt] = __p;
+		  _M_bbegin_bkt = __bkt;
 		}
 	      else
 		{