diff mbox series

[RFC,13/48] xxhash: add qemu_xxhash8

Message ID 20181025172057.20414-14-cota@braap.org
State New
Headers show
Series Plugin support | expand

Commit Message

Emilio Cota Oct. 25, 2018, 5:20 p.m. UTC
It will be used for TB hashing soon.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/xxhash.h | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

Comments

Alex Bennée Nov. 22, 2018, 5:15 p.m. UTC | #1
Emilio G. Cota <cota@braap.org> writes:

> It will be used for TB hashing soon.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  include/qemu/xxhash.h | 40 +++++++++++++++++++++++++++-------------
>  1 file changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
> index fe35dde328..450427eeaa 100644
> --- a/include/qemu/xxhash.h
> +++ b/include/qemu/xxhash.h
> @@ -49,7 +49,8 @@
>   * contiguous in memory.
>   */
>  static inline uint32_t
> -qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
> +qemu_xxhash8(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g,
> +             uint32_t h)

As we've expanded to bigger and bigger hashes why are everything after
cd passed as 32 bit values? Isn't this just generating extra register
pressure or is the compiler smart enough to figure it out?

>  {
>      uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
>      uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
> @@ -77,17 +78,24 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
>      v4 = rol32(v4, 13);
>      v4 *= PRIME32_1;
>
> -    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
> -    h32 += 28;
> +    v1 += e * PRIME32_2;
> +    v1 = rol32(v1, 13);
> +    v1 *= PRIME32_1;
>
> -    h32 += e * PRIME32_3;
> -    h32  = rol32(h32, 17) * PRIME32_4;
> +    v2 += f * PRIME32_2;
> +    v2 = rol32(v2, 13);
> +    v2 *= PRIME32_1;
> +
> +    v3 += g * PRIME32_2;
> +    v3 = rol32(v3, 13);
> +    v3 *= PRIME32_1;
>
> -    h32 += f * PRIME32_3;
> -    h32  = rol32(h32, 17) * PRIME32_4;
> +    v4 += h * PRIME32_2;
> +    v4 = rol32(v4, 13);
> +    v4 *= PRIME32_1;
>
> -    h32 += g * PRIME32_3;
> -    h32  = rol32(h32, 17) * PRIME32_4;
> +    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
> +    h32 += 32;

How do we validate we haven't broken the distribution of the original
xxhash as we've extended it?

>
>      h32 ^= h32 >> 15;
>      h32 *= PRIME32_2;
> @@ -100,23 +108,29 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
>
>  static inline uint32_t qemu_xxhash2(uint64_t ab)
>  {
> -    return qemu_xxhash7(ab, 0, 0, 0, 0);
> +    return qemu_xxhash8(ab, 0, 0, 0, 0, 0);
>  }
>
>  static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
>  {
> -    return qemu_xxhash7(ab, cd, 0, 0, 0);
> +    return qemu_xxhash8(ab, cd, 0, 0, 0, 0);
>  }
>
>  static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
>  {
> -    return qemu_xxhash7(ab, cd, e, 0, 0);
> +    return qemu_xxhash8(ab, cd, e, 0, 0, 0);
>  }
>
>  static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
>                                      uint32_t f)
>  {
> -    return qemu_xxhash7(ab, cd, e, f, 0);
> +    return qemu_xxhash8(ab, cd, e, f, 0, 0);
> +}
> +
> +static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e,
> +                                    uint32_t f, uint32_t g)
> +{
> +    return qemu_xxhash8(ab, cd, e, f, g, 0);
>  }
>
>  #endif /* QEMU_XXHASH_H */


--
Alex Bennée
Emilio Cota Nov. 23, 2018, 10:34 p.m. UTC | #2
On Thu, Nov 22, 2018 at 17:15:20 +0000, Alex Bennée wrote:
> 
> Emilio G. Cota <cota@braap.org> writes:
> 
> > It will be used for TB hashing soon.
> >
> > Signed-off-by: Emilio G. Cota <cota@braap.org>
> > ---
> >  include/qemu/xxhash.h | 40 +++++++++++++++++++++++++++-------------
> >  1 file changed, 27 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
> > index fe35dde328..450427eeaa 100644
> > --- a/include/qemu/xxhash.h
> > +++ b/include/qemu/xxhash.h
> > @@ -49,7 +49,8 @@
> >   * contiguous in memory.
> >   */
> >  static inline uint32_t
> > -qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
> > +qemu_xxhash8(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g,
> > +             uint32_t h)
> 
> As we've expanded to bigger and bigger hashes why are everything after
> cd passed as 32 bit values? Isn't this just generating extra register
> pressure or is the compiler smart enough to figure it out?

The latter -- the compiler does a good job with constant propagation.

> >  {
> >      uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
> >      uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
> > @@ -77,17 +78,24 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
> >      v4 = rol32(v4, 13);
> >      v4 *= PRIME32_1;
> >
> > -    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
> > -    h32 += 28;
> > +    v1 += e * PRIME32_2;
> > +    v1 = rol32(v1, 13);
> > +    v1 *= PRIME32_1;
(snip)
> How do we validate we haven't broken the distribution of the original
> xxhash as we've extended it?

We weren't testing for that, so this is a valid concern.

I just added a test to my xxhash repo to compare our version against
the original:
  https://github.com/cota/xxhash/tree/qemu

Turns out that to exactly match the original we just have to swap our
a/b and c/d pairs. I don't see how this could cause a loss of randomness,
but just to be safe I'll send a for-4.0 patch so that our output matches
the original one.

Thanks,

		Emilio
diff mbox series

Patch

diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
index fe35dde328..450427eeaa 100644
--- a/include/qemu/xxhash.h
+++ b/include/qemu/xxhash.h
@@ -49,7 +49,8 @@ 
  * contiguous in memory.
  */
 static inline uint32_t
-qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
+qemu_xxhash8(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g,
+             uint32_t h)
 {
     uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
     uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
@@ -77,17 +78,24 @@  qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
     v4 = rol32(v4, 13);
     v4 *= PRIME32_1;
 
-    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
-    h32 += 28;
+    v1 += e * PRIME32_2;
+    v1 = rol32(v1, 13);
+    v1 *= PRIME32_1;
 
-    h32 += e * PRIME32_3;
-    h32  = rol32(h32, 17) * PRIME32_4;
+    v2 += f * PRIME32_2;
+    v2 = rol32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += g * PRIME32_2;
+    v3 = rol32(v3, 13);
+    v3 *= PRIME32_1;
 
-    h32 += f * PRIME32_3;
-    h32  = rol32(h32, 17) * PRIME32_4;
+    v4 += h * PRIME32_2;
+    v4 = rol32(v4, 13);
+    v4 *= PRIME32_1;
 
-    h32 += g * PRIME32_3;
-    h32  = rol32(h32, 17) * PRIME32_4;
+    h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
+    h32 += 32;
 
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
@@ -100,23 +108,29 @@  qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
 
 static inline uint32_t qemu_xxhash2(uint64_t ab)
 {
-    return qemu_xxhash7(ab, 0, 0, 0, 0);
+    return qemu_xxhash8(ab, 0, 0, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
 {
-    return qemu_xxhash7(ab, cd, 0, 0, 0);
+    return qemu_xxhash8(ab, cd, 0, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
 {
-    return qemu_xxhash7(ab, cd, e, 0, 0);
+    return qemu_xxhash8(ab, cd, e, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
                                     uint32_t f)
 {
-    return qemu_xxhash7(ab, cd, e, f, 0);
+    return qemu_xxhash8(ab, cd, e, f, 0, 0);
+}
+
+static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e,
+                                    uint32_t f, uint32_t g)
+{
+    return qemu_xxhash8(ab, cd, e, f, g, 0);
 }
 
 #endif /* QEMU_XXHASH_H */