diff mbox

[1/2] Change random seeds to 64bit and drop re-crcing

Message ID 1317159016-19625-1-git-send-email-andi@firstfloor.org
State New
Headers show

Commit Message

Andi Kleen Sept. 27, 2011, 9:30 p.m. UTC
From: Andi Kleen <ak@linux.intel.com>

I had some trouble with random build failures in a large LTO project
and it turned out to be random seed collisions in a highly parallel build
(thanks to Honza for suggesting that)

There were multiple problems:
- The way to generate the random seed is pure (milli seconds plus pid)
- It's only 32bit
- Several users take the existing ascii seed and re-CRC32 it again, which
doesn't exactly improve it.

This patch changes that to:
- Always use 64bit seeds as numbers (no re-crcing)
- Change all users to use HOST_WIDE_INT
- When the user specifies a random seed it's still crc32ed, but only in
this case.

Passes bootstrap + testsuite on x86_64-linux. Ok to commit?

-Andi

gcc/cp:

2011-09-26   Andi Kleen <ak@linux.intel.com>

	* repo.c (finish_repo): Use HOST_WIDE_INT_PRINT_HEX_PURE.

gcc/:

2011-09-26   Andi Kleen <ak@linux.intel.com>

	* hwint.h (HOST_WIDE_INT_PRINT_HEX_PURE): Add.
	* lto-streamer.c (lto_get_section_name): Remove crc32_string.
 	Handle numerical random seed.
	* lto-streamer.h (lto_file_decl_data): Change id to unsigned HOST_WIDE_INT.
	* toplev.c (random_seed): Add.
	(init_random_seed): Change for numerical random seed.
	(get_random_seed): Return as HOST_WIDE_INT.
	(set_random_seed): Crc32 existing string.
	* toplev.h (get_random_seed): Change to numercal return.
	* tree.c (get_file_function_name): Remove CRC. Handle numerical random seed.

gcc/lto/:

2011-09-26   Andi Kleen <ak@linux.intel.com>

	* lto.c (lto_resolution_read): Remove id dumping.
	(lto_section_with_id): Turn id HOST_WIDE_ID.
	(create_subid_section_table): Dito.
---
 gcc/cp/repo.c      |    3 ++-
 gcc/hwint.h        |    1 +
 gcc/lto-streamer.c |    8 ++++----
 gcc/lto-streamer.h |    2 +-
 gcc/lto/lto.c      |   11 ++++-------
 gcc/toplev.c       |   23 ++++++++++++-----------
 gcc/toplev.h       |    2 +-
 gcc/tree.c         |    6 +++---
 8 files changed, 28 insertions(+), 28 deletions(-)

Comments

Richard Biener Sept. 28, 2011, 8:28 a.m. UTC | #1
On Tue, Sep 27, 2011 at 11:30 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> I had some trouble with random build failures in a large LTO project
> and it turned out to be random seed collisions in a highly parallel build
> (thanks to Honza for suggesting that)
>
> There were multiple problems:
> - The way to generate the random seed is pure (milli seconds plus pid)
> - It's only 32bit
> - Several users take the existing ascii seed and re-CRC32 it again, which
> doesn't exactly improve it.
>
> This patch changes that to:
> - Always use 64bit seeds as numbers (no re-crcing)
> - Change all users to use HOST_WIDE_INT
> - When the user specifies a random seed it's still crc32ed, but only in
> this case.
>
> Passes bootstrap + testsuite on x86_64-linux. Ok to commit?

Ok if there are no objections from other folks within 24h.

Thanks,
Richard.

> -Andi
>
> gcc/cp:
>
> 2011-09-26   Andi Kleen <ak@linux.intel.com>
>
>        * repo.c (finish_repo): Use HOST_WIDE_INT_PRINT_HEX_PURE.
>
> gcc/:
>
> 2011-09-26   Andi Kleen <ak@linux.intel.com>
>
>        * hwint.h (HOST_WIDE_INT_PRINT_HEX_PURE): Add.
>        * lto-streamer.c (lto_get_section_name): Remove crc32_string.
>        Handle numerical random seed.
>        * lto-streamer.h (lto_file_decl_data): Change id to unsigned HOST_WIDE_INT.
>        * toplev.c (random_seed): Add.
>        (init_random_seed): Change for numerical random seed.
>        (get_random_seed): Return as HOST_WIDE_INT.
>        (set_random_seed): Crc32 existing string.
>        * toplev.h (get_random_seed): Change to numercal return.
>        * tree.c (get_file_function_name): Remove CRC. Handle numerical random seed.
>
> gcc/lto/:
>
> 2011-09-26   Andi Kleen <ak@linux.intel.com>
>
>        * lto.c (lto_resolution_read): Remove id dumping.
>        (lto_section_with_id): Turn id HOST_WIDE_ID.
>        (create_subid_section_table): Dito.
> ---
>  gcc/cp/repo.c      |    3 ++-
>  gcc/hwint.h        |    1 +
>  gcc/lto-streamer.c |    8 ++++----
>  gcc/lto-streamer.h |    2 +-
>  gcc/lto/lto.c      |   11 ++++-------
>  gcc/toplev.c       |   23 ++++++++++++-----------
>  gcc/toplev.h       |    2 +-
>  gcc/tree.c         |    6 +++---
>  8 files changed, 28 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/cp/repo.c b/gcc/cp/repo.c
> index 16a192e..ca971b6 100644
> --- a/gcc/cp/repo.c
> +++ b/gcc/cp/repo.c
> @@ -263,7 +263,8 @@ finish_repo (void)
>         anonymous namespaces will get the same mangling when this
>         file is recompiled.  */
>       if (!strstr (args, "'-frandom-seed="))
> -       fprintf (repo_file, " '-frandom-seed=%s'", get_random_seed (false));
> +       fprintf (repo_file, " '-frandom-seed=" HOST_WIDE_INT_PRINT_HEX_PURE "'",
> +                get_random_seed (false));
>       fprintf (repo_file, "\n");
>     }
>
> diff --git a/gcc/hwint.h b/gcc/hwint.h
> index 2643aee..f5e0bee 100644
> --- a/gcc/hwint.h
> +++ b/gcc/hwint.h
> @@ -102,6 +102,7 @@ extern char sizeof_long_long_must_be_8[sizeof(long long) == 8 ? 1 : -1];
>  #define HOST_WIDE_INT_PRINT_DEC_C HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_C
>  #define HOST_WIDE_INT_PRINT_UNSIGNED "%" HOST_WIDE_INT_PRINT "u"
>  #define HOST_WIDE_INT_PRINT_HEX "%#" HOST_WIDE_INT_PRINT "x"
> +#define HOST_WIDE_INT_PRINT_HEX_PURE "%" HOST_WIDE_INT_PRINT "x"
>
>  /* Set HOST_WIDEST_INT.  This is a 64-bit type unless the compiler
>    in use has no 64-bit type at all; in that case it's 32 bits.  */
> diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c
> index 633c3ce..e3ccb79 100644
> --- a/gcc/lto-streamer.c
> +++ b/gcc/lto-streamer.c
> @@ -166,13 +166,13 @@ lto_get_section_name (int section_type, const char *name, struct lto_file_decl_d
>      doesn't confuse the reader with merged sections.
>
>      For options don't add a ID, the option reader cannot deal with them
> -     and merging should be ok here.
> -
> -     XXX: use crc64 to minimize collisions? */
> +     and merging should be ok here. */
>   if (section_type == LTO_section_opts)
>     strcpy (post, "");
> +  else if (f != NULL)
> +    sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
>   else
> -    sprintf (post, ".%x", f ? f->id : crc32_string(0, get_random_seed (false)));
> +    sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed (false));
>   return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL);
>  }
>
> diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
> index 190d6a3..2564bd2 100644
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -552,7 +552,7 @@ struct GTY(()) lto_file_decl_data
>   struct lto_file_decl_data *next;
>
>   /* Sub ID for merged objects. */
> -  unsigned id;
> +  unsigned HOST_WIDE_INT id;
>
>   /* Symbol resolutions for this file */
>   VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions;
> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> index 0b1dcb9..77eb1a1 100644
> --- a/gcc/lto/lto.c
> +++ b/gcc/lto/lto.c
> @@ -976,9 +976,6 @@ lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file)
>        }
>
>       file_data = (struct lto_file_decl_data *)nd->value;
> -      if (cgraph_dump_file)
> -       fprintf (cgraph_dump_file, "Adding resolution %u %u to id %x\n",
> -                index, r, file_data->id);
>       VEC_safe_grow_cleared (ld_plugin_symbol_resolution_t, heap,
>                             file_data->resolutions,
>                             max_index + 1);
> @@ -990,14 +987,14 @@ lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file)
>  /* Is the name for a id'ed LTO section? */
>
>  static int
> -lto_section_with_id (const char *name, unsigned *id)
> +lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id)
>  {
>   const char *s;
>
>   if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen (LTO_SECTION_NAME_PREFIX)))
>     return 0;
>   s = strrchr (name, '.');
> -  return s && sscanf (s, ".%x", id) == 1;
> +  return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1;
>  }
>
>  /* Create file_data of each sub file id */
> @@ -1008,7 +1005,7 @@ create_subid_section_table (void **slot, void *data)
>   struct lto_section_slot s_slot, *new_slot;
>   struct lto_section_slot *ls = *(struct lto_section_slot **)slot;
>   splay_tree file_ids = (splay_tree)data;
> -  unsigned id;
> +  unsigned HOST_WIDE_INT id;
>   splay_tree_node nd;
>   void **hash_slot;
>   char *new_name;
> @@ -1080,7 +1077,7 @@ static int lto_create_files_from_ids (splay_tree_node node, void *data)
>
>   lto_file_finalize (file_data, lw->file);
>   if (cgraph_dump_file)
> -    fprintf (cgraph_dump_file, "Creating file %s with sub id %x\n",
> +    fprintf (cgraph_dump_file, "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n",
>             file_data->file_name, file_data->id);
>   file_data->next = *lw->file_data;
>   *lw->file_data = file_data;
> diff --git a/gcc/toplev.c b/gcc/toplev.c
> index 3688c09..e1a8b35 100644
> --- a/gcc/toplev.c
> +++ b/gcc/toplev.c
> @@ -141,6 +141,9 @@ static const char *flag_random_seed;
>    user has specified a particular random seed.  */
>  unsigned local_tick;
>
> +/* Random number for this compilation */
> +HOST_WIDE_INT random_seed;
> +
>  /* -f flags.  */
>
>  /* Generate code for GNU or NeXT Objective-C runtime environment.  */
> @@ -251,7 +254,7 @@ announce_function (tree decl)
>     }
>  }
>
> -/* Initialize local_tick with the time of day, or -1 if
> +/* Initialize local_tick with a random number or -1 if
>    flag_random_seed is set.  */
>
>  static void
> @@ -286,24 +289,21 @@ init_local_tick (void)
>  static void
>  init_random_seed (void)
>  {
> -  unsigned HOST_WIDE_INT value;
> -  static char random_seed[HOST_BITS_PER_WIDE_INT / 4 + 3];
> -
> -  value = local_tick ^ getpid ();
> -
> -  sprintf (random_seed, HOST_WIDE_INT_PRINT_HEX, value);
> -  flag_random_seed = random_seed;
> +  if (flag_random_seed)
> +    random_seed = crc32_string (0, flag_random_seed);
> +  else if (!random_seed)
> +    random_seed = local_tick ^ getpid ();  /* Old racey fallback method */
>  }
>
> -/* Obtain the random_seed string.  Unless NOINIT, initialize it if
> +/* Obtain the random_seed.  Unless NOINIT, initialize it if
>    it's not provided in the command line.  */
>
> -const char *
> +HOST_WIDE_INT
>  get_random_seed (bool noinit)
>  {
>   if (!flag_random_seed && !noinit)
>     init_random_seed ();
> -  return flag_random_seed;
> +  return random_seed;
>  }
>
>  /* Modify the random_seed string to VAL.  Return its previous
> @@ -314,6 +314,7 @@ set_random_seed (const char *val)
>  {
>   const char *old = flag_random_seed;
>   flag_random_seed = val;
> +  random_seed = crc32_string (0, flag_random_seed);
>   return old;
>  }
>
> diff --git a/gcc/toplev.h b/gcc/toplev.h
> index 2455dc0..588cfdb 100644
> --- a/gcc/toplev.h
> +++ b/gcc/toplev.h
> @@ -77,7 +77,7 @@ extern bool set_src_pwd                      (const char *);
>
>  /* Functions used to manipulate the random seed.  */
>
> -extern const char *get_random_seed (bool);
> +extern HOST_WIDE_INT get_random_seed (bool);
>  extern const char *set_random_seed (const char *);
>
>  #endif /* ! GCC_TOPLEV_H */
> diff --git a/gcc/tree.c b/gcc/tree.c
> index a53c9f4..6a2e9fb 100644
> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -8749,11 +8749,11 @@ get_file_function_name (const char *type)
>        file = input_filename;
>
>       len = strlen (file);
> -      q = (char *) alloca (9 * 2 + len + 1);
> +      q = (char *) alloca (9 + 17 + len + 1);
>       memcpy (q, file, len + 1);
>
> -      sprintf (q + len, "_%08X_%08X", crc32_string (0, name),
> -              crc32_string (0, get_random_seed (false)));
> +      snprintf (q + len, 9 + 17 + 1, "_%08X_" HOST_WIDE_INT_PRINT_HEX,
> +               crc32_string (0, name), get_random_seed (false));
>
>       p = q;
>     }
> --
> 1.7.5.4
>
>
diff mbox

Patch

diff --git a/gcc/cp/repo.c b/gcc/cp/repo.c
index 16a192e..ca971b6 100644
--- a/gcc/cp/repo.c
+++ b/gcc/cp/repo.c
@@ -263,7 +263,8 @@  finish_repo (void)
 	 anonymous namespaces will get the same mangling when this
 	 file is recompiled.  */
       if (!strstr (args, "'-frandom-seed="))
-	fprintf (repo_file, " '-frandom-seed=%s'", get_random_seed (false));
+	fprintf (repo_file, " '-frandom-seed=" HOST_WIDE_INT_PRINT_HEX_PURE "'", 
+		 get_random_seed (false));
       fprintf (repo_file, "\n");
     }
 
diff --git a/gcc/hwint.h b/gcc/hwint.h
index 2643aee..f5e0bee 100644
--- a/gcc/hwint.h
+++ b/gcc/hwint.h
@@ -102,6 +102,7 @@  extern char sizeof_long_long_must_be_8[sizeof(long long) == 8 ? 1 : -1];
 #define HOST_WIDE_INT_PRINT_DEC_C HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_C
 #define HOST_WIDE_INT_PRINT_UNSIGNED "%" HOST_WIDE_INT_PRINT "u"
 #define HOST_WIDE_INT_PRINT_HEX "%#" HOST_WIDE_INT_PRINT "x"
+#define HOST_WIDE_INT_PRINT_HEX_PURE "%" HOST_WIDE_INT_PRINT "x"
 
 /* Set HOST_WIDEST_INT.  This is a 64-bit type unless the compiler
    in use has no 64-bit type at all; in that case it's 32 bits.  */
diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c
index 633c3ce..e3ccb79 100644
--- a/gcc/lto-streamer.c
+++ b/gcc/lto-streamer.c
@@ -166,13 +166,13 @@  lto_get_section_name (int section_type, const char *name, struct lto_file_decl_d
      doesn't confuse the reader with merged sections.
 
      For options don't add a ID, the option reader cannot deal with them
-     and merging should be ok here.
-
-     XXX: use crc64 to minimize collisions? */
+     and merging should be ok here. */
   if (section_type == LTO_section_opts)
     strcpy (post, "");
+  else if (f != NULL) 
+    sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
   else
-    sprintf (post, ".%x", f ? f->id : crc32_string(0, get_random_seed (false)));
+    sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed (false)); 
   return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL);
 }
 
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 190d6a3..2564bd2 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -552,7 +552,7 @@  struct GTY(()) lto_file_decl_data
   struct lto_file_decl_data *next;
 
   /* Sub ID for merged objects. */
-  unsigned id;
+  unsigned HOST_WIDE_INT id;
 
   /* Symbol resolutions for this file */
   VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions;
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 0b1dcb9..77eb1a1 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -976,9 +976,6 @@  lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file)
 	}
 
       file_data = (struct lto_file_decl_data *)nd->value;
-      if (cgraph_dump_file)
-	fprintf (cgraph_dump_file, "Adding resolution %u %u to id %x\n",
-		 index, r, file_data->id);
       VEC_safe_grow_cleared (ld_plugin_symbol_resolution_t, heap, 
 			     file_data->resolutions,
 			     max_index + 1);
@@ -990,14 +987,14 @@  lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file)
 /* Is the name for a id'ed LTO section? */
 
 static int 
-lto_section_with_id (const char *name, unsigned *id)
+lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id)
 {
   const char *s;
 
   if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen (LTO_SECTION_NAME_PREFIX)))
     return 0;
   s = strrchr (name, '.');
-  return s && sscanf (s, ".%x", id) == 1;
+  return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1;
 }
 
 /* Create file_data of each sub file id */
@@ -1008,7 +1005,7 @@  create_subid_section_table (void **slot, void *data)
   struct lto_section_slot s_slot, *new_slot;
   struct lto_section_slot *ls = *(struct lto_section_slot **)slot;
   splay_tree file_ids = (splay_tree)data;
-  unsigned id;
+  unsigned HOST_WIDE_INT id;
   splay_tree_node nd;
   void **hash_slot;
   char *new_name;
@@ -1080,7 +1077,7 @@  static int lto_create_files_from_ids (splay_tree_node node, void *data)
 
   lto_file_finalize (file_data, lw->file);
   if (cgraph_dump_file)
-    fprintf (cgraph_dump_file, "Creating file %s with sub id %x\n", 
+    fprintf (cgraph_dump_file, "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", 
 	     file_data->file_name, file_data->id);
   file_data->next = *lw->file_data;
   *lw->file_data = file_data;
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 3688c09..e1a8b35 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -141,6 +141,9 @@  static const char *flag_random_seed;
    user has specified a particular random seed.  */
 unsigned local_tick;
 
+/* Random number for this compilation */
+HOST_WIDE_INT random_seed;
+
 /* -f flags.  */
 
 /* Generate code for GNU or NeXT Objective-C runtime environment.  */
@@ -251,7 +254,7 @@  announce_function (tree decl)
     }
 }
 
-/* Initialize local_tick with the time of day, or -1 if
+/* Initialize local_tick with a random number or -1 if
    flag_random_seed is set.  */
 
 static void
@@ -286,24 +289,21 @@  init_local_tick (void)
 static void
 init_random_seed (void)
 {
-  unsigned HOST_WIDE_INT value;
-  static char random_seed[HOST_BITS_PER_WIDE_INT / 4 + 3];
-
-  value = local_tick ^ getpid ();
-
-  sprintf (random_seed, HOST_WIDE_INT_PRINT_HEX, value);
-  flag_random_seed = random_seed;
+  if (flag_random_seed)
+    random_seed = crc32_string (0, flag_random_seed);
+  else if (!random_seed)
+    random_seed = local_tick ^ getpid ();  /* Old racey fallback method */
 }
 
-/* Obtain the random_seed string.  Unless NOINIT, initialize it if
+/* Obtain the random_seed.  Unless NOINIT, initialize it if
    it's not provided in the command line.  */
 
-const char *
+HOST_WIDE_INT
 get_random_seed (bool noinit)
 {
   if (!flag_random_seed && !noinit)
     init_random_seed ();
-  return flag_random_seed;
+  return random_seed;
 }
 
 /* Modify the random_seed string to VAL.  Return its previous
@@ -314,6 +314,7 @@  set_random_seed (const char *val)
 {
   const char *old = flag_random_seed;
   flag_random_seed = val;
+  random_seed = crc32_string (0, flag_random_seed);
   return old;
 }
 
diff --git a/gcc/toplev.h b/gcc/toplev.h
index 2455dc0..588cfdb 100644
--- a/gcc/toplev.h
+++ b/gcc/toplev.h
@@ -77,7 +77,7 @@  extern bool set_src_pwd		       (const char *);
 
 /* Functions used to manipulate the random seed.  */
 
-extern const char *get_random_seed (bool);
+extern HOST_WIDE_INT get_random_seed (bool);
 extern const char *set_random_seed (const char *);
 
 #endif /* ! GCC_TOPLEV_H */
diff --git a/gcc/tree.c b/gcc/tree.c
index a53c9f4..6a2e9fb 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -8749,11 +8749,11 @@  get_file_function_name (const char *type)
 	file = input_filename;
 
       len = strlen (file);
-      q = (char *) alloca (9 * 2 + len + 1);
+      q = (char *) alloca (9 + 17 + len + 1);
       memcpy (q, file, len + 1);
 
-      sprintf (q + len, "_%08X_%08X", crc32_string (0, name),
-	       crc32_string (0, get_random_seed (false)));
+      snprintf (q + len, 9 + 17 + 1, "_%08X_" HOST_WIDE_INT_PRINT_HEX, 
+		crc32_string (0, name), get_random_seed (false));
 
       p = q;
     }