diff mbox

gcov: add new option (--hash-names) (PR gcov-profile/36412).

Message ID 1c3968f7-f359-0f80-758f-4f8122f940a8@suse.cz
State New
Headers show

Commit Message

Martin Liška Aug. 15, 2016, 11:43 a.m. UTC
On 08/15/2016 12:47 PM, Nathan Sidwell wrote:
> On 08/09/16 10:32, Martin Liška wrote:
>> Hello.
>>
>> Following enhancement for gcov solves issues when we cannot create a file due to a filesystem
>> path length limit. Selected approach utilizes existing md5sum functions.
>>
>> Patch survives make check -k RUNTESTFLAGS="gcov.exp" on x86_64-linux-gnu.
>>
>> Ready for trunk?
>> Thanks,
>> Martin
>>
> 
> +     [@option{-e}|@option{--hash-names}]
> '--hash-filenames' would be better.  Let's not confuse the user with thinking may be  the function names are hashed. (or perhaps '--hash-paths'?  The world's a little unclear on whether 'filename->last bit of file path, or the whole thing')
> 
> 
> +/* For situations when a long name can potentially hit filesystem path limit,
> +   let's calculate md5sum of the patch and create file x.gcov##md5sum.gcov.  */
> +
> +static int flag_hash_names = 0;
> +
> s/patch/path.
> Which  bit of 'x.gcov##md5sum.gcov' is the hash?  is it 'x' or sommethihg else? Perhaps this more detailed comment should be near where the filename is generated.  And this flag just labelled as someting like 'hash long pathnames'
> 
> +  fnotice (file, "  -e, --hash-names                Use hash of file path in "
> .. and ..
> +  { "long-file-names",      no_argument,       NULL, 'e' },

Hi Nathan.

All nits are applied in the second version of patch.

> 
> don't seem to match?  Why 'e'?

I've renamed it to -x, well, a lot of letters are already occupied.

Martin

> 
> nathan

Comments

Nathan Sidwell Aug. 15, 2016, 12:18 p.m. UTC | #1
On 08/15/16 07:43, Martin Liška wrote:

> All nits are applied in the second version of patch.
>
>>
>> don't seem to match?  Why 'e'?
>
> I've renamed it to -x, well, a lot of letters are already occupied.

I guess 'x' may be better.  If there is no good choice, do we really need a 
short name? (There didn't seem to be a good letter available.  I'm ambivalent so 
will leave it to you.)

gcc/doc/gcov.texi
+For situations when a long name can potentially hit filesystem path limit,
+let's calculate md5sum of the path and create file

Sorry I missed this first time round.  The language is straight from the comment 
you added, so not really suitable for user documentation.  Add a bit of 
backstory.  How about:

  "By default, gcov uses the full pathname of the source files to to create an 
output filename.  This can lead to long filenames that can overflow filesystem 
limits.  This option creates names of the form 
@file{@var{source-file}##@var{md5}.gcov}, where the @var{source-file} component 
is the final filename part and the @var{md5} component is calculated from the 
full mangled name that would have been used otherwise."

+  const char *opts = "abcdfhilmno:s:pruvx";
Could you fix the alphabetization while you're there? (s:pr)

+  /* With -x flag, file names will be in format:
+     source_file.c##<md5sum>.gcov.  */

comment shouldn't really mention the option name.  Use some indirection to avoid 
another place that could get inconsistent :)

/* When hashing filenames, we shorten them by only using the filename
    component and appending a hash of the full (mangled) pathname.  */

nathan
diff mbox

Patch

From b923bc8d838cf1de01a97db8f5ea5c78519a782b Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Tue, 9 Aug 2016 16:27:10 +0200
Subject: [PATCH] gcov: add new option (--hash-filenames) (PR
 gcov-profile/36412).

gcc/ChangeLog:

2016-08-09  Martin Liska  <mliska@suse.cz>

	PR gcov-profile/36412
	* doc/gcov.texi: Document --hash-filenames(-x).
	* gcov.c (print_usage): Add the option.
	(process_args): Process the option.
	(md5sum_to_hex): New function.
	(make_gcov_file_name): Do the md5sum and append it to a
	filename.
---
 gcc/doc/gcov.texi |  7 +++++++
 gcc/gcov.c        | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi
index df58df8..1737416 100644
--- a/gcc/doc/gcov.texi
+++ b/gcc/doc/gcov.texi
@@ -133,6 +133,7 @@  gcov [@option{-v}|@option{--version}] [@option{-h}|@option{--help}]
      [@option{-r}|@option{--relative-only}]
      [@option{-s}|@option{--source-prefix} @var{directory}]
      [@option{-u}|@option{--unconditional-branches}]
+     [@option{-x}|@option{--hash-filenames}]
      @var{files}
 @c man end
 @c man begin SEEALSO
@@ -278,6 +279,12 @@  branch:28,nottaken
 Display demangled function names in output. The default is to show
 mangled function names.
 
+@item -x
+@itemx --hash-filenames
+For situations when a long name can potentially hit filesystem path limit,
+let's calculate md5sum of the path and create file
+@file{source_file.c##<md5sum>.gcov}.
+
 @end table
 
 @command{gcov} should be run with the current directory the same as that
diff --git a/gcc/gcov.c b/gcc/gcov.c
index 30fc167..614f371 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -43,6 +43,7 @@  along with Gcov; see the file COPYING3.  If not see
 
 #include <vector>
 #include <algorithm>
+#include "md5.h"
 
 using namespace std;
 
@@ -359,6 +360,11 @@  static int flag_demangled_names = 0;
 
 static int flag_long_names = 0;
 
+/* For situations when a long name can potentially hit filesystem path limit,
+   let's calculate md5sum of the path and append it to a file name.  */
+
+static int flag_hash_filenames = 0;
+
 /* Output count information for every basic block, not merely those
    that contain line number information.  */
 
@@ -667,6 +673,7 @@  print_usage (int error_p)
   fnotice (file, "  -s, --source-prefix DIR         Source prefix to elide\n");
   fnotice (file, "  -u, --unconditional-branches    Show unconditional branch counts too\n");
   fnotice (file, "  -v, --version                   Print version number, then exit\n");
+  fnotice (file, "  -x, --hash-filenames            Hash long pathnames\n");
   fnotice (file, "\nFor bug reporting instructions, please see:\n%s.\n",
 	   bug_report_url);
   exit (status);
@@ -706,6 +713,7 @@  static const struct option options[] =
   { "source-prefix",        required_argument, NULL, 's' },
   { "unconditional-branches", no_argument,     NULL, 'u' },
   { "display-progress",     no_argument,       NULL, 'd' },
+  { "hash-filenames",	    no_argument,       NULL, 'x' },
   { 0, 0, 0, 0 }
 };
 
@@ -716,8 +724,8 @@  process_args (int argc, char **argv)
 {
   int opt;
 
-  while ((opt = getopt_long (argc, argv, "abcdfhilmno:s:pruv", options, NULL)) !=
-         -1)
+  const char *opts = "abcdfhilmno:s:pruvx";
+  while ((opt = getopt_long (argc, argv, opts, options, NULL)) != -1)
     {
       switch (opt)
 	{
@@ -770,6 +778,9 @@  process_args (int argc, char **argv)
           break;
 	case 'v':
 	  print_version ();
+	case 'x':
+	  flag_hash_filenames = 1;
+	  break;
 	  /* print_version will exit.  */
 	default:
 	  print_usage (true);
@@ -2147,6 +2158,15 @@  canonicalize_name (const char *name)
   return result;
 }
 
+/* Print hex representation of 16 bytes from SUM and write it to BUFFER.  */
+
+static void
+md5sum_to_hex (const char *sum, char *buffer)
+{
+  for (unsigned i = 0; i < 16; i++)
+    sprintf (buffer + (2 * i), "%02x", sum[i]);
+}
+
 /* Generate an output file name. INPUT_NAME is the canonicalized main
    input file and SRC_NAME is the canonicalized file name.
    LONG_OUTPUT_NAMES and PRESERVE_PATHS affect name generation.  With
@@ -2184,6 +2204,30 @@  make_gcov_file_name (const char *input_name, const char *src_name)
   ptr = mangle_name (src_name, ptr);
   strcpy (ptr, ".gcov");
 
+  /* With -x flag, file names will be in format:
+     source_file.c##<md5sum>.gcov.  */
+  if (flag_hash_filenames)
+    {
+      md5_ctx ctx;
+      char md5sum[16];
+      char md5sum_hex[33];
+
+      md5_init_ctx (&ctx);
+      md5_process_bytes (result, strlen (result), &ctx);
+      md5_finish_ctx (&ctx, md5sum);
+      md5sum_to_hex (md5sum, md5sum_hex);
+      free (result);
+
+      result = XNEWVEC (char, strlen (src_name) + 50);
+      ptr = result;
+      ptr = mangle_name (src_name, ptr);
+      ptr[0] = ptr[1] = '#';
+      ptr += 2;
+      memcpy (ptr, md5sum_hex, 32);
+      ptr += 32;
+      strcpy (ptr, ".gcov");
+    }
+
   return result;
 }
 
-- 
2.9.2