diff mbox

[1/6] hash: Introduce ptr_hash_mix routine

Message ID 501FD11B.6000006@parallels.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Pavel Emelyanov Aug. 6, 2012, 2:13 p.m. UTC
This one is used to make a salt out of a pointer to be mixed to some
hash function later. Idea and implementation are proposed by Eric Dumazet.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 include/linux/hash.h     |   10 ++++++++++
 include/net/netns/hash.h |    9 ++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

Comments

David Miller Aug. 6, 2012, 8:44 p.m. UTC | #1
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 06 Aug 2012 18:13:47 +0400

> @@ -67,4 +68,13 @@ static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
>  {
>  	return hash_long((unsigned long)ptr, bits);
>  }
> +
> +static inline u32 ptr_hash_mix(const void *ptr)
> +{
> +#if BITS_PER_LONG == 32
> +	return (u32)(unsigned long)ptr;
> +#else
> +	return (u32)((unsigned long)ptr >> L1_CACHE_SHIFT);
> +#endif
> +}
>  #endif /* _LINUX_HASH_H */

This doesn't make much sense to me.

If the whole 32-bits of the pointer is useful for entropy on 32-bit
why isn't the whole 64-bits useful on 64-bit?

I would, instead, expect something like:

	ptr ^ (ptr >> 32)

for the 64-bit case.

Also, that L1_CACHE_SHIFT is something callers can decide to do.

Only they know the size of their structure, the alignment used to
allocate such objects, and thus what bits are "less relevant" and
therefore profitable to elide from the bottom of the value.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Emelyanov Aug. 7, 2012, 9:11 a.m. UTC | #2
On 08/07/2012 12:44 AM, David Miller wrote:
> From: Pavel Emelyanov <xemul@parallels.com>
> Date: Mon, 06 Aug 2012 18:13:47 +0400
> 
>> @@ -67,4 +68,13 @@ static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
>>  {
>>  	return hash_long((unsigned long)ptr, bits);
>>  }
>> +
>> +static inline u32 ptr_hash_mix(const void *ptr)
>> +{
>> +#if BITS_PER_LONG == 32
>> +	return (u32)(unsigned long)ptr;
>> +#else
>> +	return (u32)((unsigned long)ptr >> L1_CACHE_SHIFT);
>> +#endif
>> +}
>>  #endif /* _LINUX_HASH_H */
> 
> This doesn't make much sense to me.
> 
> If the whole 32-bits of the pointer is useful for entropy on 32-bit
> why isn't the whole 64-bits useful on 64-bit?
> 
> I would, instead, expect something like:
> 
> 	ptr ^ (ptr >> 32)
> 
> for the 64-bit case.
> 
> Also, that L1_CACHE_SHIFT is something callers can decide to do.
> 
> Only they know the size of their structure, the alignment used to
> allocate such objects, and thus what bits are "less relevant" and
> therefore profitable to elide from the bottom of the value.
> .

Maybe it would be better to change the way neigh_table->hash work more
significantly then? Currently it is used like

	hash = tbl->hash(key, dev, tbl->rnd);
	hash >>= (32 - tbl->hash_shift);

i.e. the caller asks for u32 hash value and then trims some lower bits.
It can be changed like

	hash = tbl->hash(key, dev, tbl->rnd, tbl->hash_shift);

making the hash fn trim the bits itself. This will allow us to use the
existing (declared to be proven to be effective) hash_ptr() routine for
the net_device pointer hashing (it requires the number of bits to use).

E.g. the arp hash might look like

static u32 arp_hashfn(u32 key, struct net_device *dev, u32 hash_rnd,
		unsigned int bits)
{
	return hash_ptr(dev, bits) ^ hash_32(key * hash_rnd, bits);
}

and the ndisc one like

static u32 ndisc_hashfn(u32 *pkey, struct net_device *dev, u32 *hash_rnd,
		unsigned int bits)
{
	return hash_ptr(dev, bits) ^
		hash_32(key[0] * hash_rnd[0], bits) ^
		hash_32(key[1] * hash_rnd[1], bits) ^
		hash_32(key[2] * hash_rnd[2], bits) ^
		hash_32(key[3] * hash_rnd[3], bits);
}

What do you think?

Thanks,
Pavel
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Aug. 7, 2012, 9:28 a.m. UTC | #3
On Tue, 2012-08-07 at 13:11 +0400, Pavel Emelyanov wrote:
> On 08/07/2012 12:44 AM, David Miller wrote:
> > From: Pavel Emelyanov <xemul@parallels.com>
> > Date: Mon, 06 Aug 2012 18:13:47 +0400
> > 
> >> @@ -67,4 +68,13 @@ static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
> >>  {
> >>  	return hash_long((unsigned long)ptr, bits);
> >>  }
> >> +
> >> +static inline u32 ptr_hash_mix(const void *ptr)
> >> +{
> >> +#if BITS_PER_LONG == 32
> >> +	return (u32)(unsigned long)ptr;
> >> +#else
> >> +	return (u32)((unsigned long)ptr >> L1_CACHE_SHIFT);
> >> +#endif
> >> +}
> >>  #endif /* _LINUX_HASH_H */
> > 
> > This doesn't make much sense to me.
> > 
> > If the whole 32-bits of the pointer is useful for entropy on 32-bit
> > why isn't the whole 64-bits useful on 64-bit?
> > 
> > I would, instead, expect something like:
> > 
> > 	ptr ^ (ptr >> 32)
> > 
> > for the 64-bit case.
> > 
> > Also, that L1_CACHE_SHIFT is something callers can decide to do.
> > 
> > Only they know the size of their structure, the alignment used to
> > allocate such objects, and thus what bits are "less relevant" and
> > therefore profitable to elide from the bottom of the value.
> > .
> 
> Maybe it would be better to change the way neigh_table->hash work more
> significantly then? Currently it is used like
> 
> 	hash = tbl->hash(key, dev, tbl->rnd);
> 	hash >>= (32 - tbl->hash_shift);
> 
> i.e. the caller asks for u32 hash value and then trims some lower bits.
> It can be changed like
> 
> 	hash = tbl->hash(key, dev, tbl->rnd, tbl->hash_shift);
> 
> making the hash fn trim the bits itself. This will allow us to use the
> existing (declared to be proven to be effective) hash_ptr() routine for
> the net_device pointer hashing (it requires the number of bits to use).
> 
> E.g. the arp hash might look like
> 
> static u32 arp_hashfn(u32 key, struct net_device *dev, u32 hash_rnd,
> 		unsigned int bits)
> {
> 	return hash_ptr(dev, bits) ^ hash_32(key * hash_rnd, bits);
> }
> 
> and the ndisc one like
> 
> static u32 ndisc_hashfn(u32 *pkey, struct net_device *dev, u32 *hash_rnd,
> 		unsigned int bits)
> {
> 	return hash_ptr(dev, bits) ^
> 		hash_32(key[0] * hash_rnd[0], bits) ^
> 		hash_32(key[1] * hash_rnd[1], bits) ^
> 		hash_32(key[2] * hash_rnd[2], bits) ^
> 		hash_32(key[3] * hash_rnd[3], bits);
> }
> 
> What do you think?

I think we should avoid hash_ptr() because its quite expensive

David suggested to not use the L1_CACHE_SHIFT and instead do a plain :

static inline u32 ptr_hash_mix(const void *ptr)
{
	unsigned long val = (unsigned long)ptr;

#if BITS_PER_LONG == 64
	val ^= (val >> 32);
#endif
	return (u32)val;
}

By the way we could name this hash32_ptr() instead of ptr_hash_mix()



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Aug. 7, 2012, 9:39 p.m. UTC | #4
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 07 Aug 2012 13:11:41 +0400

> Maybe it would be better to change the way neigh_table->hash work more
> significantly then? Currently it is used like
> 
> 	hash = tbl->hash(key, dev, tbl->rnd);
> 	hash >>= (32 - tbl->hash_shift);
> 
> i.e. the caller asks for u32 hash value and then trims some lower bits.

We do this because the hash function we use in the neigh
implementations causes the top bits to have the most entropy.

Please look at the commits that made the code this way, it's
very much intentional.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/hash.h b/include/linux/hash.h
index b80506b..1bd0ab1 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -14,6 +14,7 @@ 
  * machines where multiplications are slow.
  */
 
+#include <linux/cache.h>
 #include <asm/types.h>
 
 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
@@ -67,4 +68,13 @@  static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
 {
 	return hash_long((unsigned long)ptr, bits);
 }
+
+static inline u32 ptr_hash_mix(const void *ptr)
+{
+#if BITS_PER_LONG == 32
+	return (u32)(unsigned long)ptr;
+#else
+	return (u32)((unsigned long)ptr >> L1_CACHE_SHIFT);
+#endif
+}
 #endif /* _LINUX_HASH_H */
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index c06ac58..bcdabe0 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -1,19 +1,14 @@ 
 #ifndef __NET_NS_HASH_H__
 #define __NET_NS_HASH_H__
 
-#include <asm/cache.h>
+#include <linux/hash.h>
 
 struct net;
 
 static inline unsigned int net_hash_mix(struct net *net)
 {
 #ifdef CONFIG_NET_NS
-	/*
-	 * shift this right to eliminate bits, that are
-	 * always zeroed
-	 */
-
-	return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
+	return ptr_hash_mix(net);
 #else
 	return 0;
 #endif