diff mbox series

[v2] rtl-optimization/116002 - cselib hash is bad

Message ID 20240722105955.F40A4385DDD4@sourceware.org
State New
Headers show
Series [v2] rtl-optimization/116002 - cselib hash is bad | expand

Commit Message

Richard Biener July 22, 2024, 10:59 a.m. UTC
The following addresses the bad hash function of cselib which uses
integer plus for merging.  This causes a huge number of collisions
for the testcase in the PR and thus very large compile-time.

The following rewrites it to use inchash, eliding duplicate mixing
of RTX code and mode in some cases and more consistently avoiding
a return value of zero as well as treating zero as fatal.  An
important part is to preserve mixing of hashes of commutative
operators as commutative.

For cselib_hash_plus_const_int this removes the apparent attempt
of making sure to hash the same as a PLUS as cselib_hash_rtx makes
sure to dispatch to cselib_hash_plus_const_int consistently.

This reduces compile-time for the testcase in the PR from unknown
to 22s and for a reduced testcase from 73s to 9s.  There's another
pending patchset to improve the speed of inchash mixing, but it's
not in the profile for this testcase (PTA pops up now).

The generated code is equal.  I've also compared cc1 builds
with and without the patch and they are now commparing equal
after retaining commutative hashing for commutative operators.

Bootstrapped and tested on x86_64-unknown-linux-gnu, will push tomorrow
if there are no further comments.

Richard.

	PR rtl-optimization/116002
	* cselib.cc (cselib_hash_rtx): Use inchash to get proper mixing.
	Consistently avoid a zero return value when hashing successfully.
	Consistently treat a zero hash value from recursing as fatal.
	Use hashval_t where appropriate.
	(cselib_hash_plus_const_int): Likewise.
	(new_cselib_val): Use hashval_t.
	(cselib_lookup_1): Likewise.
---
 gcc/cselib.cc | 224 +++++++++++++++++++++++++++-----------------------
 1 file changed, 122 insertions(+), 102 deletions(-)
diff mbox series

Patch

diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index cbaab7d515c..7beaca42424 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -51,7 +51,7 @@  static void unchain_one_value (cselib_val *);
 static void unchain_one_elt_list (struct elt_list **);
 static void unchain_one_elt_loc_list (struct elt_loc_list **);
 static void remove_useless_values (void);
-static unsigned int cselib_hash_rtx (rtx, int, machine_mode);
+static hashval_t cselib_hash_rtx (rtx, int, machine_mode);
 static cselib_val *new_cselib_val (unsigned int, machine_mode, rtx);
 static void add_mem_for_addr (cselib_val *, cselib_val *, rtx);
 static cselib_val *cselib_lookup_mem (rtx, int);
@@ -1244,7 +1244,7 @@  cselib_redundant_set_p (rtx set)
 /* Helper function for cselib_hash_rtx.  Arguments like for cselib_hash_rtx,
    except that it hashes (plus:P x c).  */
 
-static unsigned int
+static hashval_t
 cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create,
 			    machine_mode memmode)
 {
@@ -1266,14 +1266,13 @@  cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create,
   if (c == 0)
     return e->hash;
 
-  unsigned hash = (unsigned) PLUS + (unsigned) GET_MODE (x);
-  hash += e->hash;
-  unsigned int tem_hash = (unsigned) CONST_INT + (unsigned) VOIDmode;
-  tem_hash += ((unsigned) CONST_INT << 7) + (unsigned HOST_WIDE_INT) c;
-  if (tem_hash == 0)
-    tem_hash = (unsigned int) CONST_INT;
-  hash += tem_hash;
-  return hash ? hash : 1 + (unsigned int) PLUS;
+  inchash::hash hash;
+  hash.add_int (PLUS);
+  hash.add_int (GET_MODE (x));
+  hash.merge_hash (e->hash);
+  hash.add_hwi (c);
+
+  return hash.end () ? hash.end () : 1 + (unsigned int) PLUS;
 }
 
 /* Hash an rtx.  Return 0 if we couldn't hash the rtx.
@@ -1298,7 +1297,7 @@  cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create,
    If the mode is important in any context, it must be checked specifically
    in a comparison anyway, since relying on hash differences is unsafe.  */
 
-static unsigned int
+static hashval_t
 cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 {
   cselib_val *e;
@@ -1306,10 +1305,11 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
   int i, j;
   enum rtx_code code;
   const char *fmt;
-  unsigned int hash = 0;
+  inchash::hash hash;
 
   code = GET_CODE (x);
-  hash += (unsigned) code + (unsigned) GET_MODE (x);
+  hash.add_int (code);
+  hash.add_int (GET_MODE (x));
 
   switch (code)
     {
@@ -1326,19 +1326,16 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
       return e->hash;
 
     case DEBUG_EXPR:
-      hash += ((unsigned) DEBUG_EXPR << 7)
-	      + DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_EXPR;
+      hash.add_int (DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x)));
+      return hash.end () ? hash.end() : (unsigned int) DEBUG_EXPR;
 
     case DEBUG_IMPLICIT_PTR:
-      hash += ((unsigned) DEBUG_IMPLICIT_PTR << 7)
-	      + DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_IMPLICIT_PTR;
+      hash.add_int (DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x)));
+      return hash.end () ? hash.end () : (unsigned int) DEBUG_IMPLICIT_PTR;
 
     case DEBUG_PARAMETER_REF:
-      hash += ((unsigned) DEBUG_PARAMETER_REF << 7)
-	      + DECL_UID (DEBUG_PARAMETER_REF_DECL (x));
-      return hash ? hash : (unsigned int) DEBUG_PARAMETER_REF;
+      hash.add_int (DECL_UID (DEBUG_PARAMETER_REF_DECL (x)));
+      return hash.end () ? hash.end () : (unsigned int) DEBUG_PARAMETER_REF;
 
     case ENTRY_VALUE:
       /* ENTRY_VALUEs are function invariant, thus try to avoid
@@ -1347,51 +1344,49 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 	 ENTRY_VALUE hash would depend on the current value
 	 in some register or memory.  */
       if (REG_P (ENTRY_VALUE_EXP (x)))
-	hash += (unsigned int) REG
-		+ (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
-		+ (unsigned int) REGNO (ENTRY_VALUE_EXP (x));
+	hash.add_int ((unsigned int) REG
+		      + (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
+		      + (unsigned int) REGNO (ENTRY_VALUE_EXP (x)));
       else if (MEM_P (ENTRY_VALUE_EXP (x))
 	       && REG_P (XEXP (ENTRY_VALUE_EXP (x), 0)))
-	hash += (unsigned int) MEM
-		+ (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
-		+ (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0));
+	hash.add_int ((unsigned int) MEM
+		      + (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
+		      + (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0)));
       else
-	hash += cselib_hash_rtx (ENTRY_VALUE_EXP (x), create, memmode);
-      return hash ? hash : (unsigned int) ENTRY_VALUE;
+	hash.add_int (cselib_hash_rtx (ENTRY_VALUE_EXP (x), create, memmode));
+      return hash.end () ? hash.end () : (unsigned int) ENTRY_VALUE;
 
     case CONST_INT:
-      hash += ((unsigned) CONST_INT << 7) + UINTVAL (x);
-      return hash ? hash : (unsigned int) CONST_INT;
+      hash.add_hwi (UINTVAL (x));
+      return hash.end () ? hash.end () : (unsigned int) CONST_INT;
 
     case CONST_WIDE_INT:
       for (i = 0; i < CONST_WIDE_INT_NUNITS (x); i++)
-	hash += CONST_WIDE_INT_ELT (x, i);
-      return hash;
+	hash.add_hwi (CONST_WIDE_INT_ELT (x, i));
+      return hash.end () ? hash.end () : (unsigned int) CONST_WIDE_INT;
 
     case CONST_POLY_INT:
       {
-	inchash::hash h;
-	h.add_int (hash);
 	for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-	  h.add_wide_int (CONST_POLY_INT_COEFFS (x)[i]);
-	return h.end ();
+	  hash.add_wide_int (CONST_POLY_INT_COEFFS (x)[i]);
+	return hash.end () ? hash.end () : (unsigned int) CONST_POLY_INT;
       }
 
     case CONST_DOUBLE:
       /* This is like the general case, except that it only counts
 	 the integers representing the constant.  */
-      hash += (unsigned) code + (unsigned) GET_MODE (x);
       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
-	hash += ((unsigned) CONST_DOUBLE_LOW (x)
-		 + (unsigned) CONST_DOUBLE_HIGH (x));
+	{
+	  hash.add_hwi (CONST_DOUBLE_LOW (x));
+	  hash.add_hwi (CONST_DOUBLE_HIGH (x));
+	}
       else
-	hash += real_hash (CONST_DOUBLE_REAL_VALUE (x));
-      return hash ? hash : (unsigned int) CONST_DOUBLE;
+	hash.merge_hash (real_hash (CONST_DOUBLE_REAL_VALUE (x)));
+      return hash.end () ? hash.end () : (unsigned int) CONST_DOUBLE;
 
     case CONST_FIXED:
-      hash += (unsigned int) code + (unsigned int) GET_MODE (x);
-      hash += fixed_hash (CONST_FIXED_VALUE (x));
-      return hash ? hash : (unsigned int) CONST_FIXED;
+      hash.merge_hash (fixed_hash (CONST_FIXED_VALUE (x)));
+      return hash.end () ? hash.end () : (unsigned int) CONST_FIXED;
 
     case CONST_VECTOR:
       {
@@ -1403,19 +1398,18 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 	for (i = 0; i < units; ++i)
 	  {
 	    elt = CONST_VECTOR_ENCODED_ELT (x, i);
-	    hash += cselib_hash_rtx (elt, 0, memmode);
+	    hash.merge_hash (cselib_hash_rtx (elt, 0, memmode));
 	  }
 
-	return hash;
+	return hash.end () ? hash.end () : (unsigned int) CONST_VECTOR;
       }
 
       /* Assume there is only one rtx object for any given label.  */
     case LABEL_REF:
       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
 	 differences and differences between each stage's debugging dumps.  */
-      hash += (((unsigned int) LABEL_REF << 7)
-	       + CODE_LABEL_NUMBER (label_ref_label (x)));
-      return hash ? hash : (unsigned int) LABEL_REF;
+      hash.add_int (CODE_LABEL_NUMBER (label_ref_label (x)));
+      return hash.end () ? hash.end () : (unsigned int) LABEL_REF;
 
     case SYMBOL_REF:
       {
@@ -1424,51 +1418,69 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 	   different orders and thus different registers to be used in the
 	   final assembler.  This also avoids differences in the dump files
 	   between various stages.  */
-	unsigned int h = 0;
-	const unsigned char *p = (const unsigned char *) XSTR (x, 0);
+	const char *p = (const char *) XSTR (x, 0);
 
-	while (*p)
-	  h += (h << 7) + *p++; /* ??? revisit */
+	if (*p)
+	  hash.add (p, strlen (p));
 
-	hash += ((unsigned int) SYMBOL_REF << 7) + h;
-	return hash ? hash : (unsigned int) SYMBOL_REF;
+	return hash.end () ? hash.end () : (unsigned int) SYMBOL_REF;
       }
 
     case PRE_DEC:
     case PRE_INC:
-      /* We can't compute these without knowing the MEM mode.  */
-      gcc_assert (memmode != VOIDmode);
-      offset = GET_MODE_SIZE (memmode);
-      if (code == PRE_DEC)
-	offset = -offset;
-      /* Adjust the hash so that (mem:MEMMODE (pre_* (reg))) hashes
-	 like (mem:MEMMODE (plus (reg) (const_int I))).  */
-      if (GET_MODE (x) == Pmode
-	  && (REG_P (XEXP (x, 0))
-	      || MEM_P (XEXP (x, 0))
-	      || GET_CODE (XEXP (x, 0)) == VALUE))
-	{
-	  HOST_WIDE_INT c;
-	  if (offset.is_constant (&c))
-	    return cselib_hash_plus_const_int (XEXP (x, 0),
-					       trunc_int_for_mode (c, Pmode),
-					       create, memmode);
-	}
-      hash = ((unsigned) PLUS + (unsigned) GET_MODE (x)
-	      + cselib_hash_rtx (XEXP (x, 0), create, memmode)
-	      + cselib_hash_rtx (gen_int_mode (offset, GET_MODE (x)),
-				 create, memmode));
-      return hash ? hash : 1 + (unsigned) PLUS;
+      {
+	/* We can't compute these without knowing the MEM mode.  */
+	gcc_assert (memmode != VOIDmode);
+	offset = GET_MODE_SIZE (memmode);
+	if (code == PRE_DEC)
+	  offset = -offset;
+	/* Adjust the hash so that (mem:MEMMODE (pre_* (reg))) hashes
+	   like (mem:MEMMODE (plus (reg) (const_int I))).  */
+	if (GET_MODE (x) == Pmode
+	    && (REG_P (XEXP (x, 0))
+		|| MEM_P (XEXP (x, 0))
+		|| GET_CODE (XEXP (x, 0)) == VALUE))
+	  {
+	    HOST_WIDE_INT c;
+	    if (offset.is_constant (&c))
+	      return cselib_hash_plus_const_int (XEXP (x, 0),
+						 trunc_int_for_mode (c, Pmode),
+						 create, memmode);
+	  }
+
+	hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+	if (tem_hash == 0)
+	  return 0;
+	hash.merge_hash (tem_hash);
+	tem_hash = cselib_hash_rtx (gen_int_mode (offset, GET_MODE (x)),
+				    create, memmode);
+	if (tem_hash == 0)
+	  return 0;
+	hash.merge_hash (tem_hash);
+	return hash.end () ? hash.end () : 1 + (unsigned) PLUS;
+      }
 
     case PRE_MODIFY:
-      gcc_assert (memmode != VOIDmode);
-      return cselib_hash_rtx (XEXP (x, 1), create, memmode);
+      {
+	gcc_assert (memmode != VOIDmode);
+	hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 1), create, memmode);
+	if (tem_hash == 0)
+	  return 0;
+	hash.merge_hash (tem_hash);
+	return hash.end () ? hash.end () : 1 + (unsigned) PRE_MODIFY;
+      }
 
     case POST_DEC:
     case POST_INC:
     case POST_MODIFY:
-      gcc_assert (memmode != VOIDmode);
-      return cselib_hash_rtx (XEXP (x, 0), create, memmode);
+      {
+	gcc_assert (memmode != VOIDmode);
+	hashval_t tem_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+	if (tem_hash == 0)
+	  return 0;
+	hash.merge_hash (tem_hash);
+	return hash.end () ? hash.end () : 1 + (unsigned) code;
+      }
 
     case PC:
     case CALL:
@@ -1497,6 +1509,20 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 
   i = GET_RTX_LENGTH (code) - 1;
   fmt = GET_RTX_FORMAT (code);
+
+  if (COMMUTATIVE_P (x))
+    {
+      gcc_assert (i == 1 && fmt[0] == 'e' && fmt[1] == 'e');
+      hashval_t tem1_hash = cselib_hash_rtx (XEXP (x, 1), create, memmode);
+      if (tem1_hash == 0)
+	return 0;
+      hashval_t tem0_hash = cselib_hash_rtx (XEXP (x, 0), create, memmode);
+      if (tem0_hash == 0)
+	return 0;
+      hash.add_commutative (tem0_hash, tem1_hash);
+      return hash.end () ? hash.end () : 1 + (unsigned int) GET_CODE (x);
+    }
+
   for (; i >= 0; i--)
     {
       switch (fmt[i])
@@ -1504,43 +1530,38 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 	case 'e':
 	  {
 	    rtx tem = XEXP (x, i);
-	    unsigned int tem_hash = cselib_hash_rtx (tem, create, memmode);
-
+	    hashval_t tem_hash = cselib_hash_rtx (tem, create, memmode);
 	    if (tem_hash == 0)
 	      return 0;
-
-	    hash += tem_hash;
+	    hash.merge_hash (tem_hash);
 	  }
 	  break;
 	case 'E':
 	  for (j = 0; j < XVECLEN (x, i); j++)
 	    {
-	      unsigned int tem_hash
+	      hashval_t tem_hash
 		= cselib_hash_rtx (XVECEXP (x, i, j), create, memmode);
-
 	      if (tem_hash == 0)
 		return 0;
-
-	      hash += tem_hash;
+	      hash.merge_hash (tem_hash);
 	    }
 	  break;
 
 	case 's':
 	  {
-	    const unsigned char *p = (const unsigned char *) XSTR (x, i);
+	    const char *p = (const char *) XSTR (x, i);
 
-	    if (p)
-	      while (*p)
-		hash += *p++;
+	    if (p && *p)
+	      hash.add (p, strlen (p));
 	    break;
 	  }
 
 	case 'i':
-	  hash += XINT (x, i);
+	  hash.add_hwi (XINT (x, i));
 	  break;
 
 	case 'p':
-	  hash += constant_lower_bound (SUBREG_BYTE (x));
+	  hash.add_int (constant_lower_bound (SUBREG_BYTE (x)));
 	  break;
 
 	case '0':
@@ -1553,14 +1574,14 @@  cselib_hash_rtx (rtx x, int create, machine_mode memmode)
 	}
     }
 
-  return hash ? hash : 1 + (unsigned int) GET_CODE (x);
+  return hash.end () ? hash.end () : 1 + (unsigned int) GET_CODE (x);
 }
 
 /* Create a new value structure for VALUE and initialize it.  The mode of the
    value is MODE.  */
 
 static inline cselib_val *
-new_cselib_val (unsigned int hash, machine_mode mode, rtx x)
+new_cselib_val (hashval_t hash, machine_mode mode, rtx x)
 {
   cselib_val *e = cselib_val_pool.allocate ();
 
@@ -2293,7 +2314,6 @@  cselib_lookup_1 (rtx x, machine_mode mode,
 {
   cselib_val **slot;
   cselib_val *e;
-  unsigned int hashval;
 
   if (GET_MODE (x) != VOIDmode)
     mode = GET_MODE (x);
@@ -2387,7 +2407,7 @@  cselib_lookup_1 (rtx x, machine_mode mode,
   if (MEM_P (x))
     return cselib_lookup_mem (x, create);
 
-  hashval = cselib_hash_rtx (x, create, memmode);
+  hashval_t hashval = cselib_hash_rtx (x, create, memmode);
   /* Can't even create if hashing is not possible.  */
   if (! hashval)
     return 0;