diff mbox

[6/7,D] libiberty: Improve support for demangling D2 templates

Message ID CABOHX+f5jd6y5rZL0f1FFziu6u=rG_F7abDrijOXvZKv5fJvfQ@mail.gmail.com
State New
Headers show

Commit Message

Iain Buclaw May 16, 2015, 2:36 p.m. UTC
On 14 May 2015 at 17:30, Iain Buclaw <ibuclaw@gdcproject.org> wrote:
> On 14 May 2015 at 15:24, Jeff Law <law@redhat.com> wrote:
>> On 05/13/2015 02:51 AM, Iain Buclaw wrote:
>>>
>>> In my tests, this gives the demangler near-complete support.  Of a
>>> sample of about 75k symbols pulled from the standard library
>>> unittester, all but 20 were successfully parsed.
>>>
>>> ---
>>> libiberty/ChangeLog:
>>>
>>> 2015-05-13 Iain Buclaw<ibuclaw@gdcproject.org>
>>>
>>>      * d-demangle.c (dlang_symbol_kinds): New enum.
>>>      (dlang_parse_symbol): Update signature.  Handle an ambiguity between
>>> mangle
>>>      symbol for pascal and template value arguments.  Only check for a
>>> type
>>>      if parsing a function, or at the top level.  Return failure if the
>>>      entire symbol was not successfully demangled.
>>>      (dlang_identifier): Update signature.  Handle an ambiguity between
>>> two
>>>      adjacent digits in a mangled symbol string.
>>>      (dlang_type): Update call to dlang_parse_symbol.
>>>      (dlang_template_args): Likewise.
>>>      (dlang_parse_template): Likewise.
>>>      (dlang_demangle): Likewise.
>>>      * testsuite/d-demangle-expected: Fix bad tests found, and add
>>> problematic
>>>      examples to the unittests.
>>
>> OK.
>>
>> I'm going to trust the code to dis-ambiguate the adjacent digits in
>> dlang_identifier is correct.  The rest of the changes were pretty easy to
>> follow :-0
>>
>> Jeff
>>
>
> Actually, the one snippest that we should be OK without in that
> dis-ambiguate section is the while loop with the comment: "handle any
> overflow".
>

Also discovered that an infinite loop is possible in that
dis-ambiguate section when was testing gdb with these changes on
various programs.

So FYI, I've removed the while loop and fixed the problem as mentioned above.

Unfortunately it also seems that the second ambiguity with
extern(Pascal) vs Template value parameters is a little bit more
difficult to get right.  I've got an idea for a way to handle it, but
I don't like it, so I've raised a bug in upstream D. :-)

Iain
diff mbox

Patch

---
 libiberty/d-demangle.c                  | 185 ++++++++++++++++++++++++--------
 libiberty/testsuite/d-demangle-expected |  44 ++++++--
 2 files changed, 175 insertions(+), 54 deletions(-)

diff --git a/libiberty/d-demangle.c b/libiberty/d-demangle.c
index 0af926c..c697b00 100644
--- a/libiberty/d-demangle.c
+++ b/libiberty/d-demangle.c
@@ -165,6 +165,21 @@  string_prepend (string *p, const char *s)
     }
 }
 
+/* What kinds of symbol we could be parsing.  */
+enum dlang_symbol_kinds
+{
+  /* Top-level symbol, needs it's type checked.  */
+  dlang_top_level,
+  /* Function symbol, needs it's type checked.   */
+  dlang_function,
+  /* Strongly typed name, such as for classes, structs and enums.  */
+  dlang_type_name,
+  /* Template identifier.  */
+  dlang_template_ident,
+  /* Template symbol parameter.  */
+  dlang_template_param
+};
+
 /* Prototypes for forward referenced functions */
 static const char *dlang_function_args (string *, const char *);
 
@@ -172,7 +187,8 @@  static const char *dlang_type (string *, const char *);
 
 static const char *dlang_value (string *, const char *, const char *, char);
 
-static const char *dlang_parse_symbol (string *, const char *);
+static const char *dlang_parse_symbol (string *, const char *,
+				       enum dlang_symbol_kinds);
 
 static const char *dlang_parse_tuple (string *, const char *);
 
@@ -527,7 +543,7 @@  dlang_type (string *decl, const char *mangled)
     case 'E': /* enum T */
     case 'T': /* typedef T */
       mangled++;
-      return dlang_parse_symbol (decl, mangled);
+      return dlang_parse_symbol (decl, mangled, dlang_type_name);
     case 'D': /* delegate T */
     {
       string mods;
@@ -662,114 +678,162 @@  dlang_type (string *decl, const char *mangled)
 /* Extract the identifier from MANGLED and append it to DECL.
    Return the remaining string on success or NULL on failure.  */
 static const char *
-dlang_identifier (string *decl, const char *mangled)
+dlang_identifier (string *decl, const char *mangled,
+		  enum dlang_symbol_kinds kind)
 {
+  char *endptr;
+  long len;
+
   if (mangled == NULL || *mangled == '\0')
     return NULL;
 
-  if (ISDIGIT (*mangled))
+  len = strtol (mangled, &endptr, 10);
+
+  if (endptr == NULL || len <= 0)
+    return NULL;
+
+  /* In template parameter symbols, the first character of the mangled
+     name can be a digit.  This causes ambiguity issues because the
+     digits of the two numbers are adjacent.  */
+  if (kind == dlang_template_param)
     {
-      char *endptr;
-      long i = strtol (mangled, &endptr, 10);
+      long psize = len;
+      char *pend;
+      int saved = string_length (decl);
+
+      /* Work backwards until a match is found.  */
+      for (pend = endptr; endptr != NULL; pend--)
+	{
+	  mangled = pend;
 
-      if (endptr == NULL || i <= 0 || strlen (endptr) < (size_t) i)
+	  /* Reached the beginning of the pointer to the name length,
+	     try parsing the entire symbol.  */
+	  if (psize == 0)
+	    {
+	      psize = len;
+	      pend = endptr;
+	      endptr = NULL;
+	    }
+
+	  /* Check whether template parameter is a function with a valid
+	     return type or an untyped identifier.  */
+	  if (ISDIGIT (*mangled))
+	    mangled = dlang_parse_symbol (decl, mangled, dlang_template_ident);
+	  else if (strncmp (mangled, "_D", 2) == 0)
+	    {
+	      mangled += 2;
+	      mangled = dlang_parse_symbol (decl, mangled, dlang_function);
+	    }
+
+	  /* Check for name length mismatch.  */
+	  if (mangled && (mangled - pend) == psize)
+	    return mangled;
+
+	  psize /= 10;
+	  string_setlength (decl, saved);
+	}
+
+      /* No match on any combinations.  */
+      return NULL;
+    }
+  else
+    {
+      if (strlen (endptr) < (size_t) len)
 	return NULL;
 
       mangled = endptr;
 
       /* May be a template instance.  */
-      if (i >= 5 && strncmp (mangled, "__T", 3) == 0)
+      if (len >= 5 && strncmp (mangled, "__T", 3) == 0)
 	{
 	  /* Template symbol.  */
 	  if (ISDIGIT (mangled[3]) && mangled[3] != '0')
-	    return dlang_parse_template (decl, mangled, i);
+	    return dlang_parse_template (decl, mangled, len);
 
 	  return NULL;
 	}
 
-      switch (i)
+      switch (len)
 	{
 	case 6:
-	  if (strncmp (mangled, "__ctor", i) == 0)
+	  if (strncmp (mangled, "__ctor", len) == 0)
 	    {
 	      /* Constructor symbol for a class/struct.  */
 	      string_append (decl, "this");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__dtor", i) == 0)
+	  else if (strncmp (mangled, "__dtor", len) == 0)
 	    {
 	      /* Destructor symbol for a class/struct.  */
 	      string_append (decl, "~this");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__initZ", i+1) == 0)
+	  else if (strncmp (mangled, "__initZ", len+1) == 0)
 	    {
 	      /* The static initialiser for a given symbol.  */
 	      string_append (decl, "init$");
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
-	  else if (strncmp (mangled, "__vtblZ", i+1) == 0)
+	  else if (strncmp (mangled, "__vtblZ", len+1) == 0)
 	    {
 	      /* The vtable symbol for a given class.  */
 	      string_prepend (decl, "vtable for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 7:
-	  if (strncmp (mangled, "__ClassZ", i+1) == 0)
+	  if (strncmp (mangled, "__ClassZ", len+1) == 0)
 	    {
 	      /* The classinfo symbol for a given class.  */
 	      string_prepend (decl, "ClassInfo for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 10:
-	  if (strncmp (mangled, "__postblitMFZ", i+3) == 0)
+	  if (strncmp (mangled, "__postblitMFZ", len+3) == 0)
 	    {
 	      /* Postblit symbol for a struct.  */
 	      string_append (decl, "this(this)");
-	      mangled += i + 3;
+	      mangled += len + 3;
 	      return mangled;
 	    }
 	  break;
 
 	case 11:
-	  if (strncmp (mangled, "__InterfaceZ", i+1) == 0)
+	  if (strncmp (mangled, "__InterfaceZ", len+1) == 0)
 	    {
 	      /* The interface symbol for a given class.  */
 	      string_prepend (decl, "Interface for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 
 	case 12:
-	  if (strncmp (mangled, "__ModuleInfoZ", i+1) == 0)
+	  if (strncmp (mangled, "__ModuleInfoZ", len+1) == 0)
 	    {
 	      /* The ModuleInfo symbol for a given module.  */
 	      string_prepend (decl, "ModuleInfo for ");
 	      string_setlength (decl, string_length (decl) - 1);
-	      mangled += i;
+	      mangled += len;
 	      return mangled;
 	    }
 	  break;
 	}
 
-      string_appendn (decl, mangled, i);
-      mangled += i;
+      string_appendn (decl, mangled, len);
+      mangled += len;
     }
-  else
-    return NULL;
 
   return mangled;
 }
@@ -1274,25 +1338,38 @@  dlang_call_convention_p (const char *mangled)
 /* Extract and demangle the symbol in MANGLED and append it to DECL.
    Returns the remaining signature on success or NULL on failure.  */
 static const char *
-dlang_parse_symbol (string *decl, const char *mangled)
+dlang_parse_symbol (string *decl, const char *mangled,
+		    enum dlang_symbol_kinds kind)
 {
+  int saved;
   size_t n = 0;
   do
     {
       if (n++)
 	string_append (decl, ".");
 
-      mangled = dlang_identifier (decl, mangled);
+      mangled = dlang_identifier (decl, mangled, kind);
 
       if (mangled && dlang_call_convention_p (mangled))
 	{
 	  string mods;
-	  int saved;
+	  const char *start = NULL;
+	  int checkpoint = 0;
 
 	  /* Skip over 'this' parameter.  */
 	  if (*mangled == 'M')
 	    mangled++;
 
+	  /* We have reached here because we expect an extern(Pascal) function.
+	     However this is so rare, that it is more likely a template value
+	     parameter.  Since this can't be assumed, first attempt parsing
+	     the symbol as a function, and then back out on failure.  */
+	  if (*mangled == 'V')
+	    {
+	      start = mangled;
+	      checkpoint = string_length (decl);
+	    }
+
 	  /* Save the type modifiers for appending at the end.  */
 	  string_init (&mods);
 	  mangled = dlang_type_modifiers (&mods, mangled);
@@ -1307,21 +1384,41 @@  dlang_parse_symbol (string *decl, const char *mangled)
 	  mangled = dlang_function_args (decl, mangled);
 	  string_append (decl, ")");
 
-	  /* Demangle the function return type as a kind of sanity test.  */
-	  if (mangled && !ISDIGIT (*mangled))
-	    {
-	      saved = string_length (decl);
-	      mangled = dlang_type (decl, mangled);
-	      string_setlength (decl, saved);
-	    }
-
 	  /* Add any const/immutable/shared modifier. */
 	  string_appendn (decl, mods.b, string_length (&mods));
 	  string_delete (&mods);
+
+	  if (mangled == NULL && checkpoint != 0)
+	    {
+	      mangled = start;
+	      string_setlength (decl, checkpoint);
+	    }
 	}
     }
   while (mangled && ISDIGIT (*mangled));
 
+  /* Only top-level symbols or function template parameters have
+     a type that needs checking.  */
+  if (kind == dlang_top_level || kind == dlang_function)
+    {
+      /* Artificial symbols end with 'Z' and have no type.  */
+      if (mangled && *mangled == 'Z')
+	mangled++;
+      else
+	{
+	  saved = string_length (decl);
+	  mangled = dlang_type (decl, mangled);
+	  string_setlength (decl, saved);
+	}
+
+      /* Check that the entire symbol was successfully demangled.  */
+      if (kind == dlang_top_level)
+	{
+	  if (mangled == NULL || *mangled != '\0')
+	    return NULL;
+	}
+    }
+
   return mangled;
 }
 
@@ -1373,7 +1470,7 @@  dlang_template_args (string *decl, const char *mangled)
 	{
 	case 'S': /* Symbol parameter.  */
 	  mangled++;
-	  mangled = dlang_parse_symbol (decl, mangled);
+	  mangled = dlang_parse_symbol (decl, mangled, dlang_template_param);
 	  break;
 	case 'T': /* Type parameter.  */
 	  mangled++;
@@ -1431,7 +1528,7 @@  dlang_parse_template (string *decl, const char *mangled, long len)
   mangled += 3;
 
   /* Template identifier.  */
-  mangled = dlang_identifier (decl, mangled);
+  mangled = dlang_identifier (decl, mangled, dlang_template_ident);
 
   /* Template arguments.  */
   string_append (decl, "!(");
@@ -1470,7 +1567,7 @@  dlang_demangle (const char *mangled, int option ATTRIBUTE_UNUSED)
     {
       mangled += 2;
 
-      if (dlang_parse_symbol (&decl, mangled) == NULL)
+      if (dlang_parse_symbol (&decl, mangled, dlang_top_level) == NULL)
 	string_delete (&decl);
     }
 
diff --git a/libiberty/testsuite/d-demangle-expected b/libiberty/testsuite/d-demangle-expected
index 5dd0678..32da47f 100644
--- a/libiberty/testsuite/d-demangle-expected
+++ b/libiberty/testsuite/d-demangle-expected
@@ -606,12 +606,12 @@  _D8demangle17__T4testS6symbolZv
 demangle.test!(symbol)
 #
 --format=dlang
-_D8demangle21__T4testS6symbol3fooZv
+_D8demangle23__T4testS116symbol3fooZv
 demangle.test!(symbol.foo)
 #
 --format=dlang
-_D8demangle25__T4testS6symbol3foo3barZv
-demangle.test!(symbol.foo.bar)
+_D8demangle32__T4testS20_D6symbol3foo3barFZvZv
+demangle.test!(symbol.foo.bar())
 #
 --format=dlang
 _D8demangle19__T4testTaS6symbolZv
@@ -920,19 +920,19 @@  _D6plugin8generateFiiZAOa
 plugin.generate(int, int)
 #
 --format=dlang
-_D8demangle3fnAFZv3fnBMFZv
+_D8demangle3fnAFZ3fnBMFZv
 demangle.fnA().fnB()
 #
 --format=dlang
-_D8demangle4mainFZv1S3fnCFZv
+_D8demangle4mainFZ1S3fnCMFZv
 demangle.main().S.fnC()
 #
 --format=dlang
-_D8demangle4mainFZv1S3fnDMFZv
+_D8demangle4mainFZ1S3fnDMFZv
 demangle.main().S.fnD()
 #
 --format=dlang
-_D8demangle4mainFZv5localMFZi
+_D8demangle4mainFZ5localMFZi
 demangle.main().local()
 #
 --format=dlang
@@ -976,7 +976,7 @@  _D6object14TypeInfo_Array8argTypesMFNbNfJC8TypeInfoJC8TypeInfoZi
 object.TypeInfo_Array.argTypes(out TypeInfo, out TypeInfo)
 #
 --format=dlang
-_D2rt6dmain211_d_run_mainUiPPaPUAAaZiZi7tryExecMFMDFZvZv
+_D2rt6dmain211_d_run_mainUiPPaPUAAaZiZ7tryExecMFMDFZvZv
 rt.dmain2._d_run_main(int, char**, extern(C) int(char[][]) function*).tryExec(scope void() delegate)
 #
 --format=dlang
@@ -1032,13 +1032,37 @@  _D2gc11gctemplates56__T8mkBitmapTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ8mkB
 gc.gctemplates.mkBitmap!(std.range.iota!(int, int).iota(int, int).Result).mkBitmap(ulong*, ulong)
 #
 --format=dlang
-_D8serenity9persister6Sqlite70__T15SqlitePersisterTS8serenity9persister6Sqlite11__unittest6FZv4TestZ15SqlitePersister12__T7opIndexZ7opIndexMFmZS8serenity9persister6Sqlite11__unittest6FZv4Test
+_D8serenity9persister6Sqlite69__T15SqlitePersisterTS8serenity9persister6Sqlite11__unittest6FZ4TestZ15SqlitePersister12__T7opIndexZ7opIndexMFmZS8serenity9persister6Sqlite11__unittest6FZ4Test
 serenity.persister.Sqlite.SqlitePersister!(serenity.persister.Sqlite.__unittest6().Test).SqlitePersister.opIndex!().opIndex(ulong)
 #
 --format=dlang
-_D4test4mainFZv5localMFZi
+_D3std11parallelism273__T4TaskS213std11parallelism3runTDFS3std9algorithm87__T9MapResultS27_D4test4mainFZ7getTermMFiZeTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ9MapResultmmZeTS3std9algorithm87__T9MapResultS27_D4test4mainFZ7getTermMFiZeTS3std5range13__T4iotaTiTiZ4iotaFiiZ6ResultZ9MapResultTmTmZ4Task4implFPvZv
+std.parallelism.Task!(std.parallelism.run, real(std.algorithm.MapResult!(test.main().getTerm(int), std.range.iota!(int, int).iota(int, int).Result).MapResult, ulong, ulong) delegate, std.algorithm.MapResult!(test.main().getTerm(int), std.range.iota!(int, int).iota(int, int).Result).MapResult, ulong, ulong).Task.impl(void*)
+#
+--format=dlang
+_D4test4mainFZ5localMFZi
 test.main().local()
 #
 --format=dlang
 _D3std6socket12InternetHost221__T13getHostNoSyncVAyaa96_0a09202020206175746f2078203d2068746f6e6c28706172616d293b0a09202020206175746f206865203d20676574686f73746279616464722826782c20342c206361737428696e74294164647265737346616d696c792e494e4554293b0a09TkZ13getHostNoSyncMFkZb
 std.socket.InternetHost.getHostNoSync!("\n\t    auto x = htonl(param);\n\t    auto he = gethostbyaddr(&x, 4, cast(int)AddressFamily.INET);\n\t", uint).getHostNoSync(uint)
+#
+--format=dlang
+_D2rt5minfo16__unittestL518_6FZ12UTModuleInfo6__ctorMFNckZS2rt5minfo16__unittestL518_6FZ12UTModuleInfo
+rt.minfo.__unittestL518_6().UTModuleInfo.this(uint)
+#
+--format=dlang
+_D3std6traits37__T7fqnTypeTC6ObjectVbi0Vbi0Vbi0Vbi0Z13addQualifiersFAyabbbbZAya
+std.traits.fqnType!(Object, false, false, false, false).addQualifiers(immutable(char)[], bool, bool, bool, bool)
+#
+--format=dlang
+_D3std9algorithm117__T9MapResultS153std5range4onlyTS3std9algorithm53__T12FilterResultS28_D3std3uni7isUpperFNaNbNfwZbTAyaZ12FilterResultZ9MapResult5frontMFNaNdNfZS3std5range22__T10OnlyResultTwVmi1Z10OnlyResult
+std.algorithm.MapResult!(std.range.only, std.algorithm.FilterResult!(std.uni.isUpper(dchar), immutable(char)[]).FilterResult).MapResult.front()
+#
+--format=dlang
+_D3std6traits17__T6fqnSymS43stdZ11adjustIdentFAyaZAya
+std.traits.fqnSym!(std).adjustIdent(immutable(char)[])
+#
+--format=dlang
+_D2rt8lifetime36__T14_d_newarrayOpTS13_d_newarrayiTZ14_d_newarrayOpTFNaNbxC8TypeInfomPmZAv
+rt.lifetime._d_newarrayOpT!(_d_newarrayiT)._d_newarrayOpT(const(TypeInfo), ulong, ulong*)
-- 
2.1.4