page_cache: use multiplicative hash for page position calculation

Submitted by Peter Lieven on March 1, 2013, 11:53 a.m.

Details

Message ID 513096A1.8020008@dlhnet.de
State New
Headers show

Commit Message

Peter Lieven March 1, 2013, 11:53 a.m.
instead of a linear mapping we use a multiplicative hash
with the golden ratio to derive the cache bucket from the
address. this helps to reduce collisions if memory positions
are multiple of the cache size and it avoids a division
in the position calculation.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
  page_cache.c |    5 ++++-
  1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Laszlo Ersek March 1, 2013, 12:50 p.m.
On 03/01/13 12:53, Peter Lieven wrote:
> instead of a linear mapping we use a multiplicative hash
> with the golden ratio to derive the cache bucket from the
> address. this helps to reduce collisions if memory positions
> are multiple of the cache size and it avoids a division
> in the position calculation.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  page_cache.c |    5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/page_cache.c b/page_cache.c
> index 376f1db..45d769a 100644
> --- a/page_cache.c
> +++ b/page_cache.c
> @@ -24,6 +24,7 @@
>  #include <strings.h>
> 
>  #include "qemu-common.h"
> +#include "qemu/host-utils.h"
>  #include "migration/page_cache.h"
> 
>  #ifdef DEBUG_CACHE
> @@ -48,6 +49,7 @@ struct PageCache {
>      int64_t max_num_items;
>      uint64_t max_item_age;
>      int64_t num_items;
> +    uint64_t hash_shift_bits;
>  };
> 
>  PageCache *cache_init(int64_t num_pages, unsigned int page_size)
> @@ -72,6 +74,7 @@ PageCache *cache_init(int64_t num_pages, unsigned int
> page_size)
>      cache->num_items = 0;
>      cache->max_item_age = 0;
>      cache->max_num_items = num_pages;
> +    cache->hash_shift_bits = clz64(num_pages-1);
> 
>      DPRINTF("Setting cache buckets to %" PRId64 "\n",
> cache->max_num_items);
> 
> @@ -108,7 +111,7 @@ static size_t cache_get_cache_pos(const PageCache
> *cache,
>      size_t pos;
> 
>      g_assert(cache->max_num_items);
> -    pos = (address / cache->page_size) & (cache->max_num_items - 1);
> +    pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits;
>      return pos;
>  }
> 

According to <http://www.brpreiss.com/books/opus4/html/page214.html>,
the multiplier "is chosen as the integer that is relatively prime to"
2^64 "which is closest to" (sqrt(5)-1)/2 * 2^64.

(sqrt(5)-1)/2 * 2^64 ~= 11400714819323198485.86699842797038469120

hence the constant would be a=0x9e3779b97f4a7c15. Any reason why a-2 is
used in the patch?

(Note: this is not a review or any suggestion to change the patch; I'm
just curious.)

A google-fight between "a" and "a-2" is inconclusive. So is stackoverflow:

http://stackoverflow.com/questions/4113278/64-bit-multiplicative-hashing
http://stackoverflow.com/questions/8513911/how-to-create-a-good-hash-combine-with-64-bit-output-inspired-by-boosthash-co

Thanks
Laszlo
Peter Lieven March 1, 2013, 1:22 p.m.
On 01.03.2013 13:50, Laszlo Ersek wrote:
> On 03/01/13 12:53, Peter Lieven wrote:
>> instead of a linear mapping we use a multiplicative hash
>> with the golden ratio to derive the cache bucket from the
>> address. this helps to reduce collisions if memory positions
>> are multiple of the cache size and it avoids a division
>> in the position calculation.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   page_cache.c |    5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/page_cache.c b/page_cache.c
>> index 376f1db..45d769a 100644
>> --- a/page_cache.c
>> +++ b/page_cache.c
>> @@ -24,6 +24,7 @@
>>   #include <strings.h>
>>
>>   #include "qemu-common.h"
>> +#include "qemu/host-utils.h"
>>   #include "migration/page_cache.h"
>>
>>   #ifdef DEBUG_CACHE
>> @@ -48,6 +49,7 @@ struct PageCache {
>>       int64_t max_num_items;
>>       uint64_t max_item_age;
>>       int64_t num_items;
>> +    uint64_t hash_shift_bits;
>>   };
>>
>>   PageCache *cache_init(int64_t num_pages, unsigned int page_size)
>> @@ -72,6 +74,7 @@ PageCache *cache_init(int64_t num_pages, unsigned int
>> page_size)
>>       cache->num_items = 0;
>>       cache->max_item_age = 0;
>>       cache->max_num_items = num_pages;
>> +    cache->hash_shift_bits = clz64(num_pages-1);
>>
>>       DPRINTF("Setting cache buckets to %" PRId64 "\n",
>> cache->max_num_items);
>>
>> @@ -108,7 +111,7 @@ static size_t cache_get_cache_pos(const PageCache
>> *cache,
>>       size_t pos;
>>
>>       g_assert(cache->max_num_items);
>> -    pos = (address / cache->page_size) & (cache->max_num_items - 1);
>> +    pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits;
>>       return pos;
>>   }
>>
>
> According to <http://www.brpreiss.com/books/opus4/html/page214.html>,
> the multiplier "is chosen as the integer that is relatively prime to"
> 2^64 "which is closest to" (sqrt(5)-1)/2 * 2^64.
>
> (sqrt(5)-1)/2 * 2^64 ~= 11400714819323198485.86699842797038469120
>
> hence the constant would be a=0x9e3779b97f4a7c15. Any reason why a-2 is
> used in the patch?

no, actually I only googled this value and did not calculate it myself.

Peter

Patch hide | download patch | download mbox

diff --git a/page_cache.c b/page_cache.c
index 376f1db..45d769a 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -24,6 +24,7 @@ 
  #include <strings.h>

  #include "qemu-common.h"
+#include "qemu/host-utils.h"
  #include "migration/page_cache.h"

  #ifdef DEBUG_CACHE
@@ -48,6 +49,7 @@  struct PageCache {
      int64_t max_num_items;
      uint64_t max_item_age;
      int64_t num_items;
+    uint64_t hash_shift_bits;
  };

  PageCache *cache_init(int64_t num_pages, unsigned int page_size)
@@ -72,6 +74,7 @@  PageCache *cache_init(int64_t num_pages, unsigned int page_size)
      cache->num_items = 0;
      cache->max_item_age = 0;
      cache->max_num_items = num_pages;
+    cache->hash_shift_bits = clz64(num_pages-1);

      DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items);

@@ -108,7 +111,7 @@  static size_t cache_get_cache_pos(const PageCache *cache,
      size_t pos;

      g_assert(cache->max_num_items);
-    pos = (address / cache->page_size) & (cache->max_num_items - 1);
+    pos = (address * 0x9e3779b97f4a7c13) >> cache->hash_shift_bits;
      return pos;
  }