From patchwork Thu Sep 10 20:03:26 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Fran=C3=A7ois_Dumont?= X-Patchwork-Id: 516418 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 6403814030C for ; Fri, 11 Sep 2015 06:03:44 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=wDGLlJUC; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=M7+51Urx2JL4nbeBtvRHTvlaqmKwi+qczbP1o1jB9bRAxIximW JAecmIKXTK5SZcResZjH+OYIfxIz7YY9IsBOSRaQJrSNjKfCpcUiHNLlgHPCbsrN 2Mz/ZDzhRMpmVpiChpo7PHAcZvisCKDB6TPRngjcNsEaUlNhVoX2ufFW8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; s= default; bh=rtBd9ZUKc4AFZbpeqAZnGgSrUoA=; b=wDGLlJUC0UoUILgpLQ28 NPWM9Xx+p2R6mcYkajVLM5F+KbrHvopvx6I4vJNmp3n7hZapQcBkTGb/n+4HbsxW uZMFAqkSfPQV56Zb3aNzZcdusrQ8mvjZtisG9TjTwjfLPWM+AK9CPBUiQsUjrfqg 9Ks1aqa5YBYVmqyxa4JLvLw= Received: (qmail 63722 invoked by alias); 10 Sep 2015 20:03:35 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 63704 invoked by uid 89); 10 Sep 2015 20:03:34 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=AWL, BAYES_40, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-Spam-User: qpsmtpd, 2 recipients X-HELO: mail-wi0-f176.google.com Received: from mail-wi0-f176.google.com (HELO mail-wi0-f176.google.com) (209.85.212.176) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Thu, 10 Sep 2015 20:03:32 +0000 Received: by wicfx3 with SMTP id fx3so35633633wic.0; Thu, 10 Sep 2015 13:03:29 -0700 (PDT) X-Received: by 10.194.235.42 with SMTP id uj10mr55822239wjc.106.1441915408948; Thu, 10 Sep 2015 13:03:28 -0700 (PDT) Received: from [192.168.0.23] (arf62-1-82-237-250-248.fbx.proxad.net. [82.237.250.248]) by smtp.googlemail.com with ESMTPSA id o9sm17317489wja.29.2015.09.10.13.03.27 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 10 Sep 2015 13:03:28 -0700 (PDT) To: "libstdc++@gcc.gnu.org" , gcc-patches From: =?UTF-8?Q?Fran=c3=a7ois_Dumont?= Subject: New power of 2 hash policy X-Enigmail-Draft-Status: N1110 Message-ID: <55F1E20E.7080305@gmail.com> Date: Thu, 10 Sep 2015 22:03:26 +0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 MIME-Version: 1.0 Hi Here is a patch to offer an alternative hash policy. This one is using power of 2 number of buckets allowing a faster modulo operation. This is obvious when running the performance test that I have adapted to use this alternative policy. Something between current implementation and the tr1 one, the old std one. Of course with this hash policy the lower bits of the hash code are more important. For pointers it would require to change the std::hash implementation to remove the lower 0 bits like in the patch I proposed some weeks ago. What do you think ? François diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index d42cd37..97913a6 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -1870,6 +1870,11 @@ GLIBCXX_3.4.22 { # std::uncaught_exceptions() _ZSt19uncaught_exceptionsv; + extern "C++" + { + std::__detail::_Power2_rehash_policy::*; + }; + } GLIBCXX_3.4.21; # Symbols in the support library (libsupc++) have their own tag. diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index a9ad7dd..e774044 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /// smallest prime that keeps the load factor small enough. struct _Prime_rehash_policy { + using __has_load_factor = std::true_type; + _Prime_rehash_policy(float __z = 1.0) noexcept : _M_max_load_factor(__z), _M_next_resize(0) { } @@ -501,6 +503,69 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION mutable std::size_t _M_next_resize; }; + /// Range hashing function considering that second args is a power of 2. + struct _Mask_range_hashing + { + typedef std::size_t first_argument_type; + typedef std::size_t second_argument_type; + typedef std::size_t result_type; + + result_type + operator()(first_argument_type __num, + second_argument_type __den) const noexcept + { return __num & (__den - 1); } + }; + + /// Rehash policy providing power of 2 bucket numbers. Ease modulo + /// operations. + struct _Power2_rehash_policy + { + using __has_load_factor = std::true_type; + + _Power2_rehash_policy(float __z = 1.0) noexcept + : _M_max_load_factor(__z), _M_next_resize(0) { } + + float + max_load_factor() const noexcept + { return _M_max_load_factor; } + + // Return a bucket size no smaller than n. + std::size_t + _M_next_bkt(std::size_t __n) const; + + // Return a bucket count appropriate for n elements + std::size_t + _M_bkt_for_elements(std::size_t __n) const + { return __builtin_ceil(__n / (long double)_M_max_load_factor); } + + // __n_bkt is current bucket count, __n_elt is current element count, + // and __n_ins is number of elements to be inserted. Do we need to + // increase bucket count? If so, return make_pair(true, n), where n + // is the new bucket count. If not, return make_pair(false, 0). + std::pair + _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt, + std::size_t __n_ins) const; + + typedef std::size_t _State; + + _State + _M_state() const + { return _M_next_resize; } + + void + _M_reset() noexcept + { _M_next_resize = 0; } + + void + _M_reset(_State __state) + { _M_next_resize = __state; } + + static const std::size_t _S_growth_factor = 2; + + float _M_max_load_factor; + mutable std::size_t _M_next_resize; + }; + // Base classes for std::_Hashtable. We define these base classes // because in some cases we want to do different things depending on // the value of a policy class. In some cases the policy class @@ -775,8 +840,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _H1, typename _H2, typename _Hash, typename _RehashPolicy, typename _Traits, - bool _Constant_iterators = _Traits::__constant_iterators::value, - bool _Unique_keys = _Traits::__unique_keys::value> + bool _Constant_iterators = _Traits::__constant_iterators::value> struct _Insert; /// Specialization. @@ -785,65 +849,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _H1, typename _H2, typename _Hash, typename _RehashPolicy, typename _Traits> struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, true, true> + _RehashPolicy, _Traits, true> : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits> { using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>; - using value_type = typename __base_type::value_type; - using iterator = typename __base_type::iterator; - using const_iterator = typename __base_type::const_iterator; - - using __unique_keys = typename __base_type::__unique_keys; - using __hashtable = typename __base_type::__hashtable; - using __node_gen_type = typename __base_type::__node_gen_type; - - using __base_type::insert; - std::pair - insert(value_type&& __v) - { - __hashtable& __h = this->_M_conjure_hashtable(); - __node_gen_type __node_gen(__h); - return __h._M_insert(std::move(__v), __node_gen, __unique_keys()); - } - - iterator - insert(const_iterator __hint, value_type&& __v) - { - __hashtable& __h = this->_M_conjure_hashtable(); - __node_gen_type __node_gen(__h); - return __h._M_insert(__hint, std::move(__v), __node_gen, - __unique_keys()); - } - }; + using __hashtable_base = _Hashtable_base<_Key, _Value, _ExtractKey, + _Equal, _H1, _H2, _Hash, + _Traits>; - /// Specialization. - template - struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, true, false> - : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, - _H1, _H2, _Hash, _RehashPolicy, _Traits> - { - using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey, - _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits>; using value_type = typename __base_type::value_type; using iterator = typename __base_type::iterator; using const_iterator = typename __base_type::const_iterator; using __unique_keys = typename __base_type::__unique_keys; + using __ireturn_type = typename __hashtable_base::__ireturn_type; using __hashtable = typename __base_type::__hashtable; using __node_gen_type = typename __base_type::__node_gen_type; using __base_type::insert; - iterator + __ireturn_type insert(value_type&& __v) { __hashtable& __h = this->_M_conjure_hashtable(); @@ -865,9 +894,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template + typename _RehashPolicy, typename _Traits> struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, false, _Unique_keys> + _RehashPolicy, _Traits, false> : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits> { @@ -911,28 +940,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } }; + template + using __has_load_factor = typename _Policy::__has_load_factor; + /** * Primary class template _Rehash_base. * * Give hashtable the max_load_factor functions and reserve iff the - * rehash policy is _Prime_rehash_policy. + * rehash policy supports it. */ template + typename _RehashPolicy, typename _Traits, + typename = + __detected_or_t> struct _Rehash_base; - /// Specialization. + /// Specialization when rehash policy doesn't provide load factor management. template + typename _H1, typename _H2, typename _Hash, + typename _RehashPolicy, typename _Traits> + struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, + _H1, _H2, _Hash, _RehashPolicy, _Traits, + std::false_type> + { + }; + + /// Specialization when rehash policy provide load factor management. + template struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, - _H1, _H2, _Hash, _Prime_rehash_policy, _Traits> + _H1, _H2, _Hash, _RehashPolicy, _Traits, + std::true_type> { using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _Prime_rehash_policy, _Traits>; + _RehashPolicy, _Traits>; float max_load_factor() const noexcept @@ -945,7 +992,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION max_load_factor(float __z) { __hashtable* __this = static_cast<__hashtable*>(this); - __this->__rehash_policy(_Prime_rehash_policy(__z)); + __this->__rehash_policy(_RehashPolicy(__z)); } void diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc index 69f999f..7b2fd76 100644 --- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc +++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc @@ -32,6 +32,81 @@ #include #include +namespace +{ + const unsigned long __power2_list[] = // 32 or 64 + { + 1ul << 0, + 1ul << 1, + 1ul << 2, + 1ul << 3, + 1ul << 4, + 1ul << 5, + 1ul << 6, + 1ul << 7, + 1ul << 8, + 1ul << 9, + 1ul << 10, + 1ul << 11, + 1ul << 12, + 1ul << 13, + 1ul << 14, + 1ul << 15, + 1ul << 16, + 1ul << 17, + 1ul << 18, + 1ul << 19, + 1ul << 20, + 1ul << 21, + 1ul << 22, + 1ul << 23, + 1ul << 24, + 1ul << 25, + 1ul << 26, + 1ul << 27, + 1ul << 28, + 1ul << 29, + 1ul << 30, + 1ul << 31, +#if __SIZEOF_LONG__ != 8 + 1ul << 32 +#else + 1ul << 32, + 1ul << 33, + 1ul << 34, + 1ul << 35, + 1ul << 36, + 1ul << 37, + 1ul << 38, + 1ul << 39, + 1ul << 40, + 1ul << 41, + 1ul << 42, + 1ul << 43, + 1ul << 44, + 1ul << 45, + 1ul << 46, + 1ul << 47, + 1ul << 48, + 1ul << 49, + 1ul << 50, + 1ul << 51, + 1ul << 52, + 1ul << 53, + 1ul << 54, + 1ul << 55, + 1ul << 56, + 1ul << 57, + 1ul << 58, + 1ul << 59, + 1ul << 60, + 1ul << 61, + 1ul << 62, + 1ul << 63 +#endif + }; +} + namespace std _GLIBCXX_VISIBILITY(default) { #include "../shared/hashtable-aux.cc" @@ -96,6 +171,62 @@ namespace __detail return std::make_pair(false, 0); } + // Return a prime no smaller than n. + std::size_t + _Power2_rehash_policy::_M_next_bkt(std::size_t __n) const + { + // Optimize lookups involving the first elements of __prime_list. + // (useful to speed-up, eg, constructors) + static const unsigned char __fast_bkt[12] + = { 2, 2, 2, 4, 8, 8, 8, 8, 16, 16, 16, 16 }; + + if (__n <= 11) + { + _M_next_resize = + __builtin_ceil(__fast_bkt[__n] * (long double)_M_max_load_factor); + return __fast_bkt[__n]; + } + + constexpr auto __n_power2 + = sizeof(__power2_list) / sizeof(unsigned long) - 1; + const unsigned long* __next_bkt = + std::lower_bound(__power2_list + 4, __power2_list + __n_power2, __n); + _M_next_resize = + __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor); + return *__next_bkt; + } + + // Finds the smallest prime p such that alpha p > __n_elt + __n_ins. + // If p > __n_bkt, return make_pair(true, p); otherwise return + // make_pair(false, 0). In principle this isn't very different from + // _M_bkt_for_elements. + + // The only tricky part is that we're caching the element count at + // which we need to rehash, so we don't have to do a floating-point + // multiply for every insertion. + + std::pair + _Power2_rehash_policy:: + _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt, + std::size_t __n_ins) const + { + if (__n_elt + __n_ins >= _M_next_resize) + { + long double __min_bkts = (__n_elt + __n_ins) + / (long double)_M_max_load_factor; + if (__min_bkts >= __n_bkt) + return std::make_pair(true, + _M_next_bkt(std::max(__builtin_floor(__min_bkts) + 1, + __n_bkt * _S_growth_factor))); + + _M_next_resize + = __builtin_floor(__n_bkt * (long double)_M_max_load_factor); + return std::make_pair(false, 0); + } + else + return std::make_pair(false, 0); + } + _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail } // namespace std diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc index ef956a0..a07d552 100644 --- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc +++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc @@ -127,7 +127,27 @@ template using __umset = std::__umset_hashtable, std::allocator, - std::__uset_traits>; + std::__umset_traits>; + +template + using __uset2 = + std::_Hashtable, + std::__detail::_Identity, + std::equal_to, HashFunction, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits>; + +template + using __umset2 = + std::_Hashtable, + std::__detail::_Identity, + std::equal_to, HashFunction, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__umset_traits>; int main() { @@ -181,6 +201,19 @@ int main() stop_counters(time, resource); report_performance(__FILE__, "std benches", time, resource); + start_counters(time, resource); + bench<__uset2>( + "std::unordered_set2 without hash code cached ", foos); + bench<__uset2>( + "std::unordered_set2 with hash code cached ", foos); + bench<__umset2>( + "std::unordered_multiset2 without hash code cached ", foos); + bench<__umset2>( + "std::unordered_multiset2 with hash code cached ", foos); + + stop_counters(time, resource); + report_performance(__FILE__, "std2 benches", time, resource); + bench>( "std::unordered_set default cache ", foos); bench>( diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc index 9846104..4b29fde 100644 --- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc +++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc @@ -177,6 +177,16 @@ template cache>; template + using __uset2 = + std::_Hashtable, + std::__detail::_Identity, + std::equal_to, std::hash, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits>; + +template using __str_uset = std::__uset_hashtable, std::equal_to, @@ -190,6 +200,16 @@ template std::allocator, cache>; +template + using __str_uset2 = + std::_Hashtable, + std::__detail::_Identity, + std::equal_to, std::hash, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits>; + int main() { bench<__tr1_uset>( @@ -202,6 +222,10 @@ int main() "std::unordered_set with hash code cached"); bench>( "std::unordered_set default cache"); + bench<__uset2>( + "std::unordered_set2 without hash code cached"); + bench<__uset2>( + "std::unordered_set2 with hash code cached"); bench_str<__tr1_str_uset>( "std::tr1::unordered_set without hash code cached"); bench_str<__tr1_str_uset>( @@ -210,7 +234,11 @@ int main() "std::unordered_set without hash code cached"); bench_str<__str_uset>( "std::unordered_set with hash code cached"); - bench_str>( + bench_str>( "std::unordered_set default cache"); + bench_str<__str_uset2>( + "std::unordered_set2 without hash code cached"); + bench_str<__str_uset2>( + "std::unordered_set2 with hash code cached"); return 0; }