Message ID | 20230111053337.3056138-3-mkp@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,1/3] netdev-dummy: Allocate dummy_packet_stream on cacheline boundary | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
ovsrobot/intel-ovs-compilation | fail | test: fail |
On 1/11/23 06:33, Mike Pattrick wrote: > UB Sanitizer report: > > lib/hash.h:219:17: runtime error: load of misaligned address > 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte > alignment > > #0 in hash_words_inline lib/hash.h:219 > #1 in hash_words lib/hash.h:297 > [...] > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- > lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 66 insertions(+), 3 deletions(-) > Hi, Mike. I didn't read the patch well enough, but I remember that the main issue with these hash functions is that aligned and unaligned versions *must* return exact same hash result. Is that the case with your patch? And we'll need a unit test to ensure that. And, FYI, this version fails build in CI. Best regards, Ilya Maximets. > diff --git a/lib/hash.h b/lib/hash.h > index 60a39a40b..ca468c52c 100644 > --- a/lib/hash.h > +++ b/lib/hash.h > @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final) > return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ > } > > +static inline uint32_t > +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal) > +{ > + /* The finishing multiplier 0x805204f3 has been experimentally > + * derived to pass the testsuite hash tests. */ > + hash = _mm_crc32_u32(hash, semifinal); > + hash = _mm_crc32_u32(hash, final) * 0x805204f3; > + return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */ > +} > + > /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. > * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. > * > @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) > return hash_finish(hash1, hash2 << 32 | hash3); > } > > +static inline uint32_t > +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis) > +{ > + const uint32_t *p = (const void *) p_; > + uint32_t hash1 = basis; > + uint32_t hash2 = 0; > + uint32_t hash3 = n_words; > + const uint32_t *endp = (const uint32_t *) p + n_words; > + const uint32_t *limit = p + n_words - 6; > + > + while (p <= limit) { > + hash1 = _mm_crc32_u32(hash1, p[0]); > + hash1 = _mm_crc32_u32(hash1, p[1]); > + hash2 = _mm_crc32_u32(hash2, p[2]); > + hash2 = _mm_crc32_u32(hash2, p[3]); > + hash3 = _mm_crc32_u32(hash3, p[4]); > + hash3 = _mm_crc32_u32(hash3, p[5]); > + p += 6; > + } > + switch (endp - (const uint32_t *) p) { > + case 1: > + hash1 = _mm_crc32_u32(hash1, p[0]); > + break; > + case 2: > + hash1 = _mm_crc32_u32(hash1, p[0]); > + hash1 = _mm_crc32_u32(hash1, p[1]); > + break; > + case 3: > + hash1 = _mm_crc32_u32(hash1, p[0]); > + hash1 = _mm_crc32_u32(hash1, p[1]); > + hash2 = _mm_crc32_u32(hash2, p[2]); > + break; > + case 4: > + hash1 = _mm_crc32_u32(hash1, p[0]); > + hash1 = _mm_crc32_u32(hash1, p[1]); > + hash2 = _mm_crc32_u32(hash2, p[2]); > + hash2 = _mm_crc32_u32(hash2, p[3]); > + break; > + case 5: > + hash1 = _mm_crc32_u32(hash1, p[0]); > + hash1 = _mm_crc32_u32(hash1, p[1]); > + hash2 = _mm_crc32_u32(hash2, p[2]); > + hash2 = _mm_crc32_u32(hash2, p[3]); > + hash3 = _mm_crc32_u32(hash3, p[4]); > + break; > + } > + return hash_finish32(hash1, hash2, hash3); > +} > + > /* A simpler version for 64-bit data. > * 'n_words' is the count of 64-bit words, basis is 64 bits. */ > static inline uint32_t > @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis); > static inline uint32_t > hash_words(const uint32_t *p, size_t n_words, uint32_t basis) > { > - if (__builtin_constant_p(n_words)) { > - return hash_words_inline(p, n_words, basis); > + if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) { > + if (__builtin_constant_p(n_words)) { > + return hash_words_inline(p, n_words, basis); > + } else { > + return hash_words__(p, n_words, basis); > + } > } else { > - return hash_words__(p, n_words, basis); > + return hash_words_32aligned(p, n_words, basis); > } > } >
On Wed, Jan 11, 2023 at 6:24 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 1/11/23 06:33, Mike Pattrick wrote: > > UB Sanitizer report: > > > > lib/hash.h:219:17: runtime error: load of misaligned address > > 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte > > alignment > > > > #0 in hash_words_inline lib/hash.h:219 > > #1 in hash_words lib/hash.h:297 > > [...] > > > > Signed-off-by: Mike Pattrick <mkp@redhat.com> > > --- > > lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 66 insertions(+), 3 deletions(-) > > > > Hi, Mike. > > I didn't read the patch well enough, but I remember that the main > issue with these hash functions is that aligned and unaligned > versions *must* return exact same hash result. Is that the case > with your patch? And we'll need a unit test to ensure that. Yes. I had mocked up a unit test for this, but I found that it was duplicative of the check_3word_hash() function in test-hash.c. But if you prefer I could re-add the explicit test for alignment. -M > > And, FYI, this version fails build in CI. > > Best regards, Ilya Maximets. > > > diff --git a/lib/hash.h b/lib/hash.h > > index 60a39a40b..ca468c52c 100644 > > --- a/lib/hash.h > > +++ b/lib/hash.h > > @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final) > > return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ > > } > > > > +static inline uint32_t > > +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal) > > +{ > > + /* The finishing multiplier 0x805204f3 has been experimentally > > + * derived to pass the testsuite hash tests. */ > > + hash = _mm_crc32_u32(hash, semifinal); > > + hash = _mm_crc32_u32(hash, final) * 0x805204f3; > > + return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */ > > +} > > + > > /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. > > * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. > > * > > @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) > > return hash_finish(hash1, hash2 << 32 | hash3); > > } > > > > +static inline uint32_t > > +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis) > > +{ > > + const uint32_t *p = (const void *) p_; > > + uint32_t hash1 = basis; > > + uint32_t hash2 = 0; > > + uint32_t hash3 = n_words; > > + const uint32_t *endp = (const uint32_t *) p + n_words; > > + const uint32_t *limit = p + n_words - 6; > > + > > + while (p <= limit) { > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + hash1 = _mm_crc32_u32(hash1, p[1]); > > + hash2 = _mm_crc32_u32(hash2, p[2]); > > + hash2 = _mm_crc32_u32(hash2, p[3]); > > + hash3 = _mm_crc32_u32(hash3, p[4]); > > + hash3 = _mm_crc32_u32(hash3, p[5]); > > + p += 6; > > + } > > + switch (endp - (const uint32_t *) p) { > > + case 1: > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + break; > > + case 2: > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + hash1 = _mm_crc32_u32(hash1, p[1]); > > + break; > > + case 3: > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + hash1 = _mm_crc32_u32(hash1, p[1]); > > + hash2 = _mm_crc32_u32(hash2, p[2]); > > + break; > > + case 4: > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + hash1 = _mm_crc32_u32(hash1, p[1]); > > + hash2 = _mm_crc32_u32(hash2, p[2]); > > + hash2 = _mm_crc32_u32(hash2, p[3]); > > + break; > > + case 5: > > + hash1 = _mm_crc32_u32(hash1, p[0]); > > + hash1 = _mm_crc32_u32(hash1, p[1]); > > + hash2 = _mm_crc32_u32(hash2, p[2]); > > + hash2 = _mm_crc32_u32(hash2, p[3]); > > + hash3 = _mm_crc32_u32(hash3, p[4]); > > + break; > > + } > > + return hash_finish32(hash1, hash2, hash3); > > +} > > + > > /* A simpler version for 64-bit data. > > * 'n_words' is the count of 64-bit words, basis is 64 bits. */ > > static inline uint32_t > > @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis); > > static inline uint32_t > > hash_words(const uint32_t *p, size_t n_words, uint32_t basis) > > { > > - if (__builtin_constant_p(n_words)) { > > - return hash_words_inline(p, n_words, basis); > > + if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) { > > + if (__builtin_constant_p(n_words)) { > > + return hash_words_inline(p, n_words, basis); > > + } else { > > + return hash_words__(p, n_words, basis); > > + } > > } else { > > - return hash_words__(p, n_words, basis); > > + return hash_words_32aligned(p, n_words, basis); > > } > > } > > >
On 1/11/23 14:25, Mike Pattrick wrote: > On Wed, Jan 11, 2023 at 6:24 AM Ilya Maximets <i.maximets@ovn.org> wrote: >> >> On 1/11/23 06:33, Mike Pattrick wrote: >>> UB Sanitizer report: >>> >>> lib/hash.h:219:17: runtime error: load of misaligned address >>> 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte >>> alignment >>> >>> #0 in hash_words_inline lib/hash.h:219 >>> #1 in hash_words lib/hash.h:297 >>> [...] >>> >>> Signed-off-by: Mike Pattrick <mkp@redhat.com> >>> --- >>> lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++--- >>> 1 file changed, 66 insertions(+), 3 deletions(-) >>> >> >> Hi, Mike. >> >> I didn't read the patch well enough, but I remember that the main >> issue with these hash functions is that aligned and unaligned >> versions *must* return exact same hash result. Is that the case >> with your patch? And we'll need a unit test to ensure that. > > Yes. I had mocked up a unit test for this, but I found that it was > duplicative of the check_3word_hash() function in test-hash.c. But if > you prefer I could re-add the explicit test for alignment. Hmm, indeed. We probably don't need a separate test then. Do we also need an aarch64 version of the function? I'm guessing that arm build might be broken with this change. Not a full review, but see a small comment inline. > > -M > >> >> And, FYI, this version fails build in CI. >> >> Best regards, Ilya Maximets. >> >>> diff --git a/lib/hash.h b/lib/hash.h >>> index 60a39a40b..ca468c52c 100644 >>> --- a/lib/hash.h >>> +++ b/lib/hash.h >>> @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final) >>> return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ >>> } >>> >>> +static inline uint32_t >>> +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal) >>> +{ >>> + /* The finishing multiplier 0x805204f3 has been experimentally >>> + * derived to pass the testsuite hash tests. */ >>> + hash = _mm_crc32_u32(hash, semifinal); >>> + hash = _mm_crc32_u32(hash, final) * 0x805204f3; >>> + return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */ >>> +} >>> + >>> /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. >>> * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. >>> * >>> @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) >>> return hash_finish(hash1, hash2 << 32 | hash3); >>> } >>> >>> +static inline uint32_t >>> +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis) We should use a pointer instead of [] type here. And maybe fix the SSE version of hash_words_inline as well. >>> +{ >>> + const uint32_t *p = (const void *) p_; >>> + uint32_t hash1 = basis; >>> + uint32_t hash2 = 0; >>> + uint32_t hash3 = n_words; >>> + const uint32_t *endp = (const uint32_t *) p + n_words; >>> + const uint32_t *limit = p + n_words - 6; >>> + >>> + while (p <= limit) { >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + hash1 = _mm_crc32_u32(hash1, p[1]); >>> + hash2 = _mm_crc32_u32(hash2, p[2]); >>> + hash2 = _mm_crc32_u32(hash2, p[3]); >>> + hash3 = _mm_crc32_u32(hash3, p[4]); >>> + hash3 = _mm_crc32_u32(hash3, p[5]); >>> + p += 6; >>> + } >>> + switch (endp - (const uint32_t *) p) { >>> + case 1: >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + break; >>> + case 2: >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + hash1 = _mm_crc32_u32(hash1, p[1]); >>> + break; >>> + case 3: >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + hash1 = _mm_crc32_u32(hash1, p[1]); >>> + hash2 = _mm_crc32_u32(hash2, p[2]); >>> + break; >>> + case 4: >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + hash1 = _mm_crc32_u32(hash1, p[1]); >>> + hash2 = _mm_crc32_u32(hash2, p[2]); >>> + hash2 = _mm_crc32_u32(hash2, p[3]); >>> + break; >>> + case 5: >>> + hash1 = _mm_crc32_u32(hash1, p[0]); >>> + hash1 = _mm_crc32_u32(hash1, p[1]); >>> + hash2 = _mm_crc32_u32(hash2, p[2]); >>> + hash2 = _mm_crc32_u32(hash2, p[3]); >>> + hash3 = _mm_crc32_u32(hash3, p[4]); >>> + break; >>> + } >>> + return hash_finish32(hash1, hash2, hash3); >>> +} >>> + >>> /* A simpler version for 64-bit data. >>> * 'n_words' is the count of 64-bit words, basis is 64 bits. */ >>> static inline uint32_t >>> @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis); >>> static inline uint32_t >>> hash_words(const uint32_t *p, size_t n_words, uint32_t basis) >>> { >>> - if (__builtin_constant_p(n_words)) { >>> - return hash_words_inline(p, n_words, basis); >>> + if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) { >>> + if (__builtin_constant_p(n_words)) { >>> + return hash_words_inline(p, n_words, basis); >>> + } else { >>> + return hash_words__(p, n_words, basis); >>> + } >>> } else { >>> - return hash_words__(p, n_words, basis); >>> + return hash_words_32aligned(p, n_words, basis); >>> } >>> } >>> >> >
diff --git a/lib/hash.h b/lib/hash.h index 60a39a40b..ca468c52c 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -187,6 +187,16 @@ static inline uint32_t hash_finish(uint64_t hash, uint64_t final) return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ } +static inline uint32_t +hash_finish32(uint64_t hash, uint32_t final, uint32_t semifinal) +{ + /* The finishing multiplier 0x805204f3 has been experimentally + * derived to pass the testsuite hash tests. */ + hash = _mm_crc32_u32(hash, semifinal); + hash = _mm_crc32_u32(hash, final) * 0x805204f3; + return hash ^ ((uint32_t) hash >> 16); /* Increase entropy in LSBs. */ +} + /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. * @@ -232,6 +242,55 @@ hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) return hash_finish(hash1, hash2 << 32 | hash3); } +static inline uint32_t +hash_words_32aligned(const uint32_t p_[], size_t n_words, uint32_t basis) +{ + const uint32_t *p = (const void *) p_; + uint32_t hash1 = basis; + uint32_t hash2 = 0; + uint32_t hash3 = n_words; + const uint32_t *endp = (const uint32_t *) p + n_words; + const uint32_t *limit = p + n_words - 6; + + while (p <= limit) { + hash1 = _mm_crc32_u32(hash1, p[0]); + hash1 = _mm_crc32_u32(hash1, p[1]); + hash2 = _mm_crc32_u32(hash2, p[2]); + hash2 = _mm_crc32_u32(hash2, p[3]); + hash3 = _mm_crc32_u32(hash3, p[4]); + hash3 = _mm_crc32_u32(hash3, p[5]); + p += 6; + } + switch (endp - (const uint32_t *) p) { + case 1: + hash1 = _mm_crc32_u32(hash1, p[0]); + break; + case 2: + hash1 = _mm_crc32_u32(hash1, p[0]); + hash1 = _mm_crc32_u32(hash1, p[1]); + break; + case 3: + hash1 = _mm_crc32_u32(hash1, p[0]); + hash1 = _mm_crc32_u32(hash1, p[1]); + hash2 = _mm_crc32_u32(hash2, p[2]); + break; + case 4: + hash1 = _mm_crc32_u32(hash1, p[0]); + hash1 = _mm_crc32_u32(hash1, p[1]); + hash2 = _mm_crc32_u32(hash2, p[2]); + hash2 = _mm_crc32_u32(hash2, p[3]); + break; + case 5: + hash1 = _mm_crc32_u32(hash1, p[0]); + hash1 = _mm_crc32_u32(hash1, p[1]); + hash2 = _mm_crc32_u32(hash2, p[2]); + hash2 = _mm_crc32_u32(hash2, p[3]); + hash3 = _mm_crc32_u32(hash3, p[4]); + break; + } + return hash_finish32(hash1, hash2, hash3); +} + /* A simpler version for 64-bit data. * 'n_words' is the count of 64-bit words, basis is 64 bits. */ static inline uint32_t @@ -293,10 +352,14 @@ uint32_t hash_words64__(const uint64_t *p, size_t n_words, uint32_t basis); static inline uint32_t hash_words(const uint32_t *p, size_t n_words, uint32_t basis) { - if (__builtin_constant_p(n_words)) { - return hash_words_inline(p, n_words, basis); + if (OVS_LIKELY(((intptr_t) p & ((sizeof(uint64_t)) - 1)) == 0)) { + if (__builtin_constant_p(n_words)) { + return hash_words_inline(p, n_words, basis); + } else { + return hash_words__(p, n_words, basis); + } } else { - return hash_words__(p, n_words, basis); + return hash_words_32aligned(p, n_words, basis); } }
UB Sanitizer report: lib/hash.h:219:17: runtime error: load of misaligned address 0x7ffc164a88b4 for type 'const uint64_t', which requires 8 byte alignment #0 in hash_words_inline lib/hash.h:219 #1 in hash_words lib/hash.h:297 [...] Signed-off-by: Mike Pattrick <mkp@redhat.com> --- lib/hash.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-)