diff mbox series

[RFC] tree-optimization/113910 - bitmap_hash is weak, improve iterative_hash_*

Message ID 20240216125032.5F11713343@imap2.dmz-prg2.suse.org
State New
Headers show
Series [RFC] tree-optimization/113910 - bitmap_hash is weak, improve iterative_hash_* | expand

Commit Message

Richard Biener Feb. 16, 2024, 12:50 p.m. UTC
The following addresses the weak bitmap_hash function which results
in points-to analysis taking a long time because of a high collision
rate in one of its bitmap hash tables.  Using a better hash function
like in the bitmap.cc hunk below doesn't help unless one also replaces
the hash function in iterative_hash_* with something faster.

I've taken the implementation from BFD string merging and extracted
a 4 and 8 byte worker to replace iterative_hash_hashval_t and
iterative_hash_host_wide_it.  I didn't yet replace the generic
iterative_hash as its implementation resides in libiberty.

With this hash the testcase shows

   5.15%          9323  cc1plus  cc1plus             [.] bitmap_hash

and a compile-time of 44s while using the original hash implementation
this becomes

  10.50%         20405  cc1plus  cc1plus             [.] bitmap_hash

and a compile-time of 46s, still faster than using original bitmap_hash
which takes 56s and while having bitmap_hash off the profile shows

  21.56%         49490  cc1plus  cc1plus           [.] bitmap_equal_p

because of collision rates in the 20s.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

OK for stage1?

Should I try to change libiberty iterative_hash or implement a
generic block variant for GCCs use with a different name, no
longer using libibertys iterative_hash?

Thanks,
Richard.

	PR tree-optimization/113910
	* inchash.h (iterative_hash_host_wide_int): Change hash
	function.
	(iterative_hash_hashval_t): Likewise.
	* bitmap.cc (bitmap_hash): Hash index and bits.
---
 gcc/bitmap.cc |  8 +++---
 gcc/inchash.h | 80 +++++++++++++++++++++++++--------------------------
 2 files changed, 44 insertions(+), 44 deletions(-)
diff mbox series

Patch

diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc
index 459e32c1ad1..f502841f385 100644
--- a/gcc/bitmap.cc
+++ b/gcc/bitmap.cc
@@ -2695,18 +2695,18 @@  hashval_t
 bitmap_hash (const_bitmap head)
 {
   const bitmap_element *ptr;
-  BITMAP_WORD hash = 0;
+  hashval_t hash = 0;
   int ix;
 
   gcc_checking_assert (!head->tree_form);
 
   for (ptr = head->first; ptr; ptr = ptr->next)
     {
-      hash ^= ptr->indx;
+      hash = iterative_hash_hashval_t (ptr->indx, hash);
       for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
-	hash ^= ptr->bits[ix];
+	hash = iterative_hash_host_wide_int (ptr->bits[ix], hash);
     }
-  return iterative_hash (&hash, sizeof (hash), 0);
+  return hash;
 }
 
 
diff --git a/gcc/inchash.h b/gcc/inchash.h
index e88f9b5eac1..4cdef1e7fce 100644
--- a/gcc/inchash.h
+++ b/gcc/inchash.h
@@ -28,8 +28,8 @@  along with GCC; see the file COPYING3.  If not see
    Currently it just implements the plain old jhash based
    incremental hash from gcc's tree.cc.  */
 
-hashval_t iterative_hash_host_wide_int (HOST_WIDE_INT, hashval_t);
-hashval_t iterative_hash_hashval_t (hashval_t, hashval_t);
+hashval_t iterative_hash_host_wide_int (uint64_t, hashval_t);
+hashval_t iterative_hash_hashval_t (uint32_t, hashval_t);
 
 namespace inchash
 {
@@ -157,57 +157,57 @@  class hash
 
 }
 
-/* Borrowed from hashtab.c iterative_hash implementation.  */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<< 8); \
-  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
-  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
-  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
-  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
-  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
-}
-
 
 /* Produce good hash value combining VAL and VAL2.  */
 inline
 hashval_t
-iterative_hash_hashval_t (hashval_t val, hashval_t val2)
+iterative_hash_hashval_t (uint32_t val, hashval_t val2)
 {
-  /* the golden ratio; an arbitrary value.  */
-  hashval_t a = 0x9e3779b9;
+  static_assert (sizeof (hashval_t) == sizeof (uint32_t), "");
+
+  const uint32_t mul = ((1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5)
+			+ (1 << 7) + (1 << 11) + (1 << 13) + (1 << 17)
+			+ (0 << 19) + (1 << 23) + (1 << 29) + (1u << 31));
+
+  const unsigned len = 4;
+  hashval_t acc = val2 + len * 0x9e3779b1;
+
+  uint16_t i1 = (uint16_t)val ^ (0x396cfeb8 + len);
+  uint16_t i2 = (uint16_t)(val >> 16) ^ (0xbe4ba423 + len);
+  hashval_t m = (uint32_t)i1 * i2;
 
-  mix (a, val, val2);
-  return val2;
+  acc += m;
+  acc = acc ^ (acc >> 7);
+  uint64_t r = (uint64_t)mul * acc;
+  return (uint32_t)r ^ (uint32_t)(r >> 32);
 }
 
 /* Produce good hash value combining VAL and VAL2.  */
 
 inline
 hashval_t
-iterative_hash_host_wide_int (HOST_WIDE_INT val, hashval_t val2)
+iterative_hash_host_wide_int (uint64_t val, hashval_t val2)
 {
-  if (sizeof (HOST_WIDE_INT) == sizeof (hashval_t))
-    return iterative_hash_hashval_t (val, val2);
-  else
-    {
-      hashval_t a = (hashval_t) val;
-      /* Avoid warnings about shifting of more than the width of the type on
-         hosts that won't execute this path.  */
-      int zero = 0;
-      hashval_t b = (hashval_t) (val >> (sizeof (hashval_t) * 8 + zero));
-      mix (a, b, val2);
-      if (sizeof (HOST_WIDE_INT) > 2 * sizeof (hashval_t))
-	{
-	  hashval_t a = (hashval_t) (val >> (sizeof (hashval_t) * 16 + zero));
-	  hashval_t b = (hashval_t) (val >> (sizeof (hashval_t) * 24 + zero));
-	  mix (a, b, val2);
-	}
-      return val2;
-    }
+  static_assert (sizeof (hashval_t) == sizeof (uint32_t), "");
+  static_assert (sizeof (HOST_WIDE_INT) == sizeof (uint64_t), "");
+
+  const uint32_t mul = ((1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5)
+			+ (1 << 7) + (1 << 11) + (1 << 13) + (1 << 17)
+			+ (0 << 19) + (1 << 23) + (1 << 29) + (1u << 31));
+
+  const unsigned len = 8;
+  hashval_t acc = val2 + len * 0x9e3779b1;
+
+  uint32_t i1 = (uint32_t)val ^ (0x396cfeb8 + len);
+  uint32_t i2 = (uint32_t)(val >> 32) ^ (0xbe4ba423 + len);
+  uint64_t m64 = (uint64_t)i1 * i2;
+  hashval_t m = (uint32_t)m64 ^ (uint32_t)(m64 >> 32);
+
+  acc += m;
+  acc = acc ^ (acc >> 7);
+  uint64_t r = (uint64_t)mul * acc;
+  return (uint32_t)r ^ (uint32_t)(r >> 32);
 }
 
+
 #endif