diff mbox series

[ovs-dev,3/3] hash: Avoid 64bit crc intrinsics on 32bit aligned data

Message ID 20230111053337.3056138-3-mkp@redhat.com
State Superseded
Headers show
Series [ovs-dev,1/3] netdev-dummy: Allocate dummy_packet_stream on cacheline boundary | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Mike Pattrick Jan. 11, 2023, 5:33 a.m. UTC
UB Sanitizer report:

lib/hash.h:219:17: runtime error: load of misaligned address
0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte
alignment

    #0 in hash_words_inline lib/hash.h:219
    #1 in hash_words lib/hash.h:297
    [...]

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 3 deletions(-)

Comments

Ilya Maximets Jan. 11, 2023, 11:24 a.m. UTC | #1
On 1/11/23 06:33, Mike Pattrick wrote:
> UB Sanitizer report:
> 
> lib/hash.h:219:17: runtime error: load of misaligned address
> 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte
> alignment
> 
>     #0 in hash_words_inline lib/hash.h:219
>     #1 in hash_words lib/hash.h:297
>     [...]
> 
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
>  lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 66 insertions(+), 3 deletions(-)
> 

Hi, Mike.

I didn't read the patch well enough, but I remember that the main
issue with these hash functions is that aligned and unaligned
versions *must* return exact same hash result.  Is that the case
with your patch?  And we'll need a unit test to ensure that.

And, FYI, this version fails build in CI.

Best regards, Ilya Maximets.

> diff --git a/lib/hash.h b/lib/hash.h
> index 60a39a40b..ca468c52c 100644
> --- a/lib/hash.h
> +++ b/lib/hash.h
> @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
>      return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
>  }
>  
> +static inline uint32_t
> +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal)
> +{
> +    /* The finishing multiplier 0x805204f3 has been experimentally
> +     * derived to pass the testsuite hash tests. */
> +    hash = _mm_crc32_u32(hash, semifinal);
> +    hash = _mm_crc32_u32(hash, final) * 0x805204f3;
> +    return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */
> +}
> +
>  /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
>   * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
>   *
> @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
>      return hash_finish(hash1, hash2 << 32 | hash3);
>  }
>  
> +static inline uint32_t
> +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis)
> +{
> +    const uint32_t *p = (const void *) p_;
> +    uint32_t hash1 = basis;
> +    uint32_t hash2 = 0;
> +    uint32_t hash3 = n_words;
> +    const uint32_t *endp = (const uint32_t *) p + n_words;
> +    const uint32_t *limit = p + n_words - 6;
> +
> +    while (p <= limit) {
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        hash1 = _mm_crc32_u32(hash1, p[1]);
> +        hash2 = _mm_crc32_u32(hash2, p[2]);
> +        hash2 = _mm_crc32_u32(hash2, p[3]);
> +        hash3 = _mm_crc32_u32(hash3, p[4]);
> +        hash3 = _mm_crc32_u32(hash3, p[5]);
> +        p += 6;
> +    }
> +    switch (endp - (const uint32_t *) p) {
> +    case 1:
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        break;
> +    case 2:
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        hash1 = _mm_crc32_u32(hash1, p[1]);
> +        break;
> +    case 3:
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        hash1 = _mm_crc32_u32(hash1, p[1]);
> +        hash2 = _mm_crc32_u32(hash2, p[2]);
> +        break;
> +    case 4:
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        hash1 = _mm_crc32_u32(hash1, p[1]);
> +        hash2 = _mm_crc32_u32(hash2, p[2]);
> +        hash2 = _mm_crc32_u32(hash2, p[3]);
> +        break;
> +    case 5:
> +        hash1 = _mm_crc32_u32(hash1, p[0]);
> +        hash1 = _mm_crc32_u32(hash1, p[1]);
> +        hash2 = _mm_crc32_u32(hash2, p[2]);
> +        hash2 = _mm_crc32_u32(hash2, p[3]);
> +        hash3 = _mm_crc32_u32(hash3, p[4]);
> +        break;
> +    }
> +    return hash_finish32(hash1, hash2, hash3);
> +}
> +
>  /* A simpler version for 64-bit data.
>   * 'n_words' is the count of 64-bit words, basis is 64 bits. */
>  static inline uint32_t
> @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis);
>  static inline uint32_t
>  hash_words(const uint32_t *p, size_t n_words, uint32_t basis)
>  {
> -    if (__builtin_constant_p(n_words)) {
> -        return hash_words_inline(p, n_words, basis);
> +    if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) {
> +        if (__builtin_constant_p(n_words)) {
> +            return hash_words_inline(p, n_words, basis);
> +        } else {
> +            return hash_words__(p, n_words, basis);
> +        }
>      } else {
> -        return hash_words__(p, n_words, basis);
> +        return hash_words_32aligned(p, n_words, basis);
>      }
>  }
>
Mike Pattrick Jan. 11, 2023, 1:25 p.m. UTC | #2
On Wed, Jan 11, 2023 at 6:24 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 1/11/23 06:33, Mike Pattrick wrote:
> > UB Sanitizer report:
> >
> > lib/hash.h:219:17: runtime error: load of misaligned address
> > 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte
> > alignment
> >
> >     #0 in hash_words_inline lib/hash.h:219
> >     #1 in hash_words lib/hash.h:297
> >     [...]
> >
> > Signed-off-by: Mike Pattrick <mkp@redhat.com>
> > ---
> >  lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 66 insertions(+), 3 deletions(-)
> >
>
> Hi, Mike.
>
> I didn't read the patch well enough, but I remember that the main
> issue with these hash functions is that aligned and unaligned
> versions *must* return exact same hash result.  Is that the case
> with your patch?  And we'll need a unit test to ensure that.

Yes. I had mocked up a unit test for this, but I found that it was
duplicative of the check_3word_hash() function in test-hash.c. But if
you prefer I could re-add the explicit test for alignment.

-M

>
> And, FYI, this version fails build in CI.
>
> Best regards, Ilya Maximets.
>
> > diff --git a/lib/hash.h b/lib/hash.h
> > index 60a39a40b..ca468c52c 100644
> > --- a/lib/hash.h
> > +++ b/lib/hash.h
> > @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
> >      return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
> >  }
> >
> > +static inline uint32_t
> > +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal)
> > +{
> > +    /* The finishing multiplier 0x805204f3 has been experimentally
> > +     * derived to pass the testsuite hash tests. */
> > +    hash = _mm_crc32_u32(hash, semifinal);
> > +    hash = _mm_crc32_u32(hash, final) * 0x805204f3;
> > +    return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */
> > +}
> > +
> >  /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
> >   * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
> >   *
> > @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
> >      return hash_finish(hash1, hash2 << 32 | hash3);
> >  }
> >
> > +static inline uint32_t
> > +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis)
> > +{
> > +    const uint32_t *p = (const void *) p_;
> > +    uint32_t hash1 = basis;
> > +    uint32_t hash2 = 0;
> > +    uint32_t hash3 = n_words;
> > +    const uint32_t *endp = (const uint32_t *) p + n_words;
> > +    const uint32_t *limit = p + n_words - 6;
> > +
> > +    while (p <= limit) {
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        hash1 = _mm_crc32_u32(hash1, p[1]);
> > +        hash2 = _mm_crc32_u32(hash2, p[2]);
> > +        hash2 = _mm_crc32_u32(hash2, p[3]);
> > +        hash3 = _mm_crc32_u32(hash3, p[4]);
> > +        hash3 = _mm_crc32_u32(hash3, p[5]);
> > +        p += 6;
> > +    }
> > +    switch (endp - (const uint32_t *) p) {
> > +    case 1:
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        break;
> > +    case 2:
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        hash1 = _mm_crc32_u32(hash1, p[1]);
> > +        break;
> > +    case 3:
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        hash1 = _mm_crc32_u32(hash1, p[1]);
> > +        hash2 = _mm_crc32_u32(hash2, p[2]);
> > +        break;
> > +    case 4:
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        hash1 = _mm_crc32_u32(hash1, p[1]);
> > +        hash2 = _mm_crc32_u32(hash2, p[2]);
> > +        hash2 = _mm_crc32_u32(hash2, p[3]);
> > +        break;
> > +    case 5:
> > +        hash1 = _mm_crc32_u32(hash1, p[0]);
> > +        hash1 = _mm_crc32_u32(hash1, p[1]);
> > +        hash2 = _mm_crc32_u32(hash2, p[2]);
> > +        hash2 = _mm_crc32_u32(hash2, p[3]);
> > +        hash3 = _mm_crc32_u32(hash3, p[4]);
> > +        break;
> > +    }
> > +    return hash_finish32(hash1, hash2, hash3);
> > +}
> > +
> >  /* A simpler version for 64-bit data.
> >   * 'n_words' is the count of 64-bit words, basis is 64 bits. */
> >  static inline uint32_t
> > @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis);
> >  static inline uint32_t
> >  hash_words(const uint32_t *p, size_t n_words, uint32_t basis)
> >  {
> > -    if (__builtin_constant_p(n_words)) {
> > -        return hash_words_inline(p, n_words, basis);
> > +    if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) {
> > +        if (__builtin_constant_p(n_words)) {
> > +            return hash_words_inline(p, n_words, basis);
> > +        } else {
> > +            return hash_words__(p, n_words, basis);
> > +        }
> >      } else {
> > -        return hash_words__(p, n_words, basis);
> > +        return hash_words_32aligned(p, n_words, basis);
> >      }
> >  }
> >
>
Ilya Maximets Jan. 11, 2023, 2:35 p.m. UTC | #3
On 1/11/23 14:25, Mike Pattrick wrote:
> On Wed, Jan 11, 2023 at 6:24 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 1/11/23 06:33, Mike Pattrick wrote:
>>> UB Sanitizer report:
>>>
>>> lib/hash.h:219:17: runtime error: load of misaligned address
>>> 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte
>>> alignment
>>>
>>>     #0 in hash_words_inline lib/hash.h:219
>>>     #1 in hash_words lib/hash.h:297
>>>     [...]
>>>
>>> Signed-off-by: Mike Pattrick <mkp@redhat.com>
>>> ---
>>>  lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>>>  1 file changed, 66 insertions(+), 3 deletions(-)
>>>
>>
>> Hi, Mike.
>>
>> I didn't read the patch well enough, but I remember that the main
>> issue with these hash functions is that aligned and unaligned
>> versions *must* return exact same hash result.  Is that the case
>> with your patch?  And we'll need a unit test to ensure that.
> 
> Yes. I had mocked up a unit test for this, but I found that it was
> duplicative of the check_3word_hash() function in test-hash.c. But if
> you prefer I could re-add the explicit test for alignment.

Hmm, indeed.  We probably don't need a separate test then.

Do we also need an aarch64 version of the function?  I'm guessing
that arm build might be broken with this change.

Not a full review, but see a small comment inline.

> 
> -M
> 
>>
>> And, FYI, this version fails build in CI.
>>
>> Best regards, Ilya Maximets.
>>
>>> diff --git a/lib/hash.h b/lib/hash.h
>>> index 60a39a40b..ca468c52c 100644
>>> --- a/lib/hash.h
>>> +++ b/lib/hash.h
>>> @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
>>>      return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
>>>  }
>>>
>>> +static inline uint32_t
>>> +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal)
>>> +{
>>> +    /* The finishing multiplier 0x805204f3 has been experimentally
>>> +     * derived to pass the testsuite hash tests. */
>>> +    hash = _mm_crc32_u32(hash, semifinal);
>>> +    hash = _mm_crc32_u32(hash, final) * 0x805204f3;
>>> +    return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */
>>> +}
>>> +
>>>  /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
>>>   * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
>>>   *
>>> @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
>>>      return hash_finish(hash1, hash2 << 32 | hash3);
>>>  }
>>>
>>> +static inline uint32_t
>>> +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis)

We should use a pointer instead of [] type here.
And maybe fix the SSE version of hash_words_inline as well.

>>> +{
>>> +    const uint32_t *p = (const void *) p_;
>>> +    uint32_t hash1 = basis;
>>> +    uint32_t hash2 = 0;
>>> +    uint32_t hash3 = n_words;
>>> +    const uint32_t *endp = (const uint32_t *) p + n_words;
>>> +    const uint32_t *limit = p + n_words - 6;
>>> +
>>> +    while (p <= limit) {
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        hash1 = _mm_crc32_u32(hash1, p[1]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[2]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[3]);
>>> +        hash3 = _mm_crc32_u32(hash3, p[4]);
>>> +        hash3 = _mm_crc32_u32(hash3, p[5]);
>>> +        p += 6;
>>> +    }
>>> +    switch (endp - (const uint32_t *) p) {
>>> +    case 1:
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        break;
>>> +    case 2:
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        hash1 = _mm_crc32_u32(hash1, p[1]);
>>> +        break;
>>> +    case 3:
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        hash1 = _mm_crc32_u32(hash1, p[1]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[2]);
>>> +        break;
>>> +    case 4:
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        hash1 = _mm_crc32_u32(hash1, p[1]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[2]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[3]);
>>> +        break;
>>> +    case 5:
>>> +        hash1 = _mm_crc32_u32(hash1, p[0]);
>>> +        hash1 = _mm_crc32_u32(hash1, p[1]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[2]);
>>> +        hash2 = _mm_crc32_u32(hash2, p[3]);
>>> +        hash3 = _mm_crc32_u32(hash3, p[4]);
>>> +        break;
>>> +    }
>>> +    return hash_finish32(hash1, hash2, hash3);
>>> +}
>>> +
>>>  /* A simpler version for 64-bit data.
>>>   * 'n_words' is the count of 64-bit words, basis is 64 bits. */
>>>  static inline uint32_t
>>> @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis);
>>>  static inline uint32_t
>>>  hash_words(const uint32_t *p, size_t n_words, uint32_t basis)
>>>  {
>>> -    if (__builtin_constant_p(n_words)) {
>>> -        return hash_words_inline(p, n_words, basis);
>>> +    if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) {
>>> +        if (__builtin_constant_p(n_words)) {
>>> +            return hash_words_inline(p, n_words, basis);
>>> +        } else {
>>> +            return hash_words__(p, n_words, basis);
>>> +        }
>>>      } else {
>>> -        return hash_words__(p, n_words, basis);
>>> +        return hash_words_32aligned(p, n_words, basis);
>>>      }
>>>  }
>>>
>>
>
diff mbox series

Patch

diff --git a/lib/hash.h b/lib/hash.h
index 60a39a40b..ca468c52c 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -187,6 +187,16 @@  static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
     return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
 }
 
+static inline uint32_t
+hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal)
+{
+    /* The finishing multiplier 0x805204f3 has been experimentally
+     * derived to pass the testsuite hash tests. */
+    hash = _mm_crc32_u32(hash, semifinal);
+    hash = _mm_crc32_u32(hash, final) * 0x805204f3;
+    return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */
+}
+
 /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
  * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
  *
@@ -232,6 +242,55 @@  hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
     return hash_finish(hash1, hash2 << 32 | hash3);
 }
 
+static inline uint32_t
+hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis)
+{
+    const uint32_t *p = (const void *) p_;
+    uint32_t hash1 = basis;
+    uint32_t hash2 = 0;
+    uint32_t hash3 = n_words;
+    const uint32_t *endp = (const uint32_t *) p + n_words;
+    const uint32_t *limit = p + n_words - 6;
+
+    while (p <= limit) {
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        hash1 = _mm_crc32_u32(hash1, p[1]);
+        hash2 = _mm_crc32_u32(hash2, p[2]);
+        hash2 = _mm_crc32_u32(hash2, p[3]);
+        hash3 = _mm_crc32_u32(hash3, p[4]);
+        hash3 = _mm_crc32_u32(hash3, p[5]);
+        p += 6;
+    }
+    switch (endp - (const uint32_t *) p) {
+    case 1:
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        break;
+    case 2:
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        hash1 = _mm_crc32_u32(hash1, p[1]);
+        break;
+    case 3:
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        hash1 = _mm_crc32_u32(hash1, p[1]);
+        hash2 = _mm_crc32_u32(hash2, p[2]);
+        break;
+    case 4:
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        hash1 = _mm_crc32_u32(hash1, p[1]);
+        hash2 = _mm_crc32_u32(hash2, p[2]);
+        hash2 = _mm_crc32_u32(hash2, p[3]);
+        break;
+    case 5:
+        hash1 = _mm_crc32_u32(hash1, p[0]);
+        hash1 = _mm_crc32_u32(hash1, p[1]);
+        hash2 = _mm_crc32_u32(hash2, p[2]);
+        hash2 = _mm_crc32_u32(hash2, p[3]);
+        hash3 = _mm_crc32_u32(hash3, p[4]);
+        break;
+    }
+    return hash_finish32(hash1, hash2, hash3);
+}
+
 /* A simpler version for 64-bit data.
  * 'n_words' is the count of 64-bit words, basis is 64 bits. */
 static inline uint32_t
@@ -293,10 +352,14 @@  uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis);
 static inline uint32_t
 hash_words(const uint32_t *p, size_t n_words, uint32_t basis)
 {
-    if (__builtin_constant_p(n_words)) {
-        return hash_words_inline(p, n_words, basis);
+    if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) {
+        if (__builtin_constant_p(n_words)) {
+            return hash_words_inline(p, n_words, basis);
+        } else {
+            return hash_words__(p, n_words, basis);
+        }
     } else {
-        return hash_words__(p, n_words, basis);
+        return hash_words_32aligned(p, n_words, basis);
     }
 }