diff mbox

Use libbacktrace as libsanitizer's symbolizer

Message ID 20131118162342.GH892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 18, 2013, 4:23 p.m. UTC
On Mon, Nov 18, 2013 at 07:50:33PM +0400, Alexey Samsonov wrote:
> On Mon, Nov 18, 2013 at 7:49 PM, Alexey Samsonov <samsonov@google.com> wrote:
>  Unfortunately, recently there were a few enhancements to
>  sanitizer_symbolizer_posix_libcdep.cc (and friends),
>  in LLVM trunk, and now it looks different from gcc version (apparently, the
>  changes were committed
>  after the merge to gcc happened, I should have pointed this out earlier,
>  sorry).
> 
>  Kostya (or Jakub), is it possible to somehow pick up the changes? Otherwise
>  this patch can't go in ASan runtime
>  in gcc - the code will significantly diverge.

Here is an (untested) forward port of the patch to what is in llvm svn right
now.  The unpatched code always had either internal_symbolizer_ != NULL, or
external_symbolizer_ != NULL, or both NULL, but not both set.  The patch
just adds a third variant, libbacktrace_symbolizer_, again, at most one
of the 3 will be non-NULL, and the priorities are that internal_symbolizer_
has highest priority, then libbacktrace (if available/usable), then the
external one.



	Jakub

Comments

Alexey Samsonov Nov. 19, 2013, 3:14 p.m. UTC | #1
Hi Jakub,

Can you please an updated patch in the attachment?

On Mon, Nov 18, 2013 at 8:23 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Nov 18, 2013 at 07:50:33PM +0400, Alexey Samsonov wrote:
>> On Mon, Nov 18, 2013 at 7:49 PM, Alexey Samsonov <samsonov@google.com> wrote:
>>  Unfortunately, recently there were a few enhancements to
>>  sanitizer_symbolizer_posix_libcdep.cc (and friends),
>>  in LLVM trunk, and now it looks different from gcc version (apparently, the
>>  changes were committed
>>  after the merge to gcc happened, I should have pointed this out earlier,
>>  sorry).
>>
>>  Kostya (or Jakub), is it possible to somehow pick up the changes? Otherwise
>>  this patch can't go in ASan runtime
>>  in gcc - the code will significantly diverge.
>
> Here is an (untested) forward port of the patch to what is in llvm svn right
> now.  The unpatched code always had either internal_symbolizer_ != NULL, or
> external_symbolizer_ != NULL, or both NULL, but not both set.  The patch
> just adds a third variant, libbacktrace_symbolizer_, again, at most one
> of the 3 will be non-NULL, and the priorities are that internal_symbolizer_
> has highest priority, then libbacktrace (if available/usable), then the
> external one.
>
> --- sanitizer_symbolizer_posix_libcdep.cc.jj    2013-11-12 19:35:30.000000000 +0100
> +++ sanitizer_symbolizer_posix_libcdep.cc       2013-11-18 17:16:03.202643957 +0100
> @@ -27,6 +27,16 @@
>  #include <sys/wait.h>
>  #include <unistd.h>
>
> +#ifdef SANITIZER_LIBBACKTRACE
> +#include "backtrace-supported.h"
> +#if SANITIZER_LINUX && BACKTRACE_SUPPORTED \
> +    && !BACKTRACE_USES_MALLOC && BACKTRACE_SUPPORTS_THREADS
> +#include "backtrace.h"
> +#else
> +#undef SANITIZER_LIBBACKTRACE
> +#endif
> +#endif
> +
>  // C++ demangling function, as required by Itanium C++ ABI. This is weak,
>  // because we do not require a C++ ABI library to be linked to a program
>  // using sanitizers; if it's not present, we'll just use the mangled name.
> @@ -364,12 +374,124 @@ class InternalSymbolizer {
>
>  #endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
>
> +#if SANITIZER_LIBBACKTRACE
> +namespace {
> +
> +struct SymbolizeCodeData {
> +  AddressInfo *frames;
> +  uptr n_frames;
> +  uptr max_frames;

max_frames is not used anywhere.

> +  const char *module_name;
> +  uptr module_offset;
> +};
> +
> +extern "C" {

why do you need extern "C" here?

> +
> +static int SymbolizeCodePCInfoCallback(void *vdata, uintptr_t addr,
> +                                      const char *filename, int lineno,
> +                                      const char *function) {
> +  SymbolizeCodeData *cdata = (SymbolizeCodeData *)vdata;
> +  if (function) {
> +    AddressInfo *info = &cdata->frames[cdata->n_frames++];
> +    info->Clear();
> +    info->FillAddressAndModuleInfo(addr, cdata->module_name, cdata->module_offset);
> +    info->function = internal_strdup(function);
> +    if (filename)
> +      info->file = internal_strdup(filename);
> +    info->line = lineno;
> +  }
> +  return 0;
> +}
> +
> +static void SymbolizeCodeCallback(void *vdata, uintptr_t addr,
> +                                 const char *symname, uintptr_t) {
> +  SymbolizeCodeData *cdata = (SymbolizeCodeData *)vdata;
> +  if (symname) {
> +    AddressInfo *info = &cdata->frames[0];
> +    info->Clear();
> +    info->FillAddressAndModuleInfo(addr, cdata->module_name, cdata->module_offset);
> +    info->function = internal_strdup(symname);
> +    cdata->n_frames = 1;
> +  }
> +}
> +
> +static void SymbolizeDataCallback(void *vdata, uintptr_t,
> +                                 const char *symname, uintptr_t symval) {
> +  DataInfo *info = (DataInfo *)vdata;
> +  if (symname && symval) {
> +    info->name = internal_strdup(symname);
> +    info->start = symval;
> +  }
> +}
> +
> +static void ErrorCallback(void *, const char *, int) {
> +}
> +
> +}
> +
> +}
> +
> +class LibbacktraceSymbolizer {
> + public:
> +  static LibbacktraceSymbolizer *get(LowLevelAllocator *alloc) {
> +    backtrace_state *state
> +      = backtrace_create_state("/proc/self/exe", 1, ErrorCallback, NULL);

You may want to use ReadBinaryName() here (in case it was cached
earlier, e.g. before
we create a sandbox).

> +    if (!state)
> +      return 0;
> +    return new(*alloc) LibbacktraceSymbolizer(state);
> +  }
> +
> +  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames,
> +                    const char *module_name, uptr module_offset) {
> +    SymbolizeCodeData data;
> +    data.frames = frames;
> +    data.n_frames = 0;
> +    data.max_frames = max_frames;
> +    data.module_name = module_name;
> +    data.module_offset = module_offset;
> +    backtrace_pcinfo(state_, addr, SymbolizeCodePCInfoCallback, ErrorCallback,
> +                    &data);
> +    if (data.n_frames)
> +      return data.n_frames;
> +    backtrace_syminfo(state_, addr, SymbolizeCodeCallback, ErrorCallback, &data);
> +    return data.n_frames;
> +  }
> +
> +  void SymbolizeData(DataInfo *info) {
> +    backtrace_syminfo(state_, info->address, SymbolizeDataCallback,
> +                     ErrorCallback, info);
> +  }
> +
> + private:
> +  LibbacktraceSymbolizer(backtrace_state *state) : state_(state) { }
> +
> +  backtrace_state *state_;     // Leaked.
> +};
> +#else
> +class LibbacktraceSymbolizer {
> + public:
> +  static LibbacktraceSymbolizer *get(LowLevelAllocator *) {
> +    return 0;
> +  }
> +
> +  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames,
> +                    const char *module_name, uptr module_offset) {
> +    return 0;
> +  }
> +
> +  void SymbolizeData(DataInfo *info) {
> +  }
> +};
> +#endif

Looks like Libbacktrace symbolizer is now wrapped in a straightforward
interface. Could you please
move it it sanitizer_symbolizer_libbacktrace.{h,cc}, and keep changes
to sanitizer_symbolizer_posix_libcdep.cc
minimal?

> +
>  class POSIXSymbolizer : public Symbolizer {
>   public:
>    POSIXSymbolizer(ExternalSymbolizer *external_symbolizer,
> +                 LibbacktraceSymbolizer *libbacktrace_symbolizer,
>                    InternalSymbolizer *internal_symbolizer)
>        : Symbolizer(),
>          external_symbolizer_(external_symbolizer),
> +       libbacktrace_symbolizer_(libbacktrace_symbolizer),
>          internal_symbolizer_(internal_symbolizer) {}
>
>    uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
> @@ -381,7 +503,18 @@ class POSIXSymbolizer : public Symbolize
>        return 0;
>      const char *module_name = module->full_name();
>      uptr module_offset = addr - module->base_address();
> -    const char *str = SendCommand(false, module_name, module_offset);
> +    const char *str = 0;
> +    if (libbacktrace_symbolizer_)
> +      {
> +       uptr ret
> +         = libbacktrace_symbolizer_->SymbolizeCode(addr, frames, max_frames,
> +                                                   module_name,
> +                                                   module_offset);
> +       if (ret)
> +         return ret;
> +      }
> +    else
> +      str = SendCommand(false, module_name, module_offset);
>      if (str == 0) {
>        // External symbolizer was not initialized or failed. Fill only data
>        // about module name and offset.
> @@ -444,6 +577,10 @@ class POSIXSymbolizer : public Symbolize
>      info->address = addr;
>      info->module = internal_strdup(module_name);
>      info->module_offset = module_offset;
> +    if (libbacktrace_symbolizer_) {
> +      libbacktrace_symbolizer_->SymbolizeData(info);
> +      return true;
> +    }
>      const char *str = SendCommand(true, module_name, module_offset);
>      if (str == 0)
>        return true;
> @@ -455,7 +594,9 @@ class POSIXSymbolizer : public Symbolize
>    }
>
>    bool IsAvailable() {
> -    return internal_symbolizer_ != 0 || external_symbolizer_ != 0;
> +    return internal_symbolizer_ != 0 ||
> +          libbacktrace_symbolizer_ != 0 ||
> +          external_symbolizer_ != 0;
>    }
>
>    bool IsExternalAvailable() {
> @@ -573,14 +714,19 @@ class POSIXSymbolizer : public Symbolize
>
>    ExternalSymbolizer *external_symbolizer_;        // Leaked.
>    InternalSymbolizer *const internal_symbolizer_;  // Leaked.
> +  LibbacktraceSymbolizer *libbacktrace_symbolizer_;  // Leaked.
>  };
>
>  Symbolizer *Symbolizer::PlatformInit(const char *path_to_external) {
>    InternalSymbolizer* internal_symbolizer =
>        InternalSymbolizer::get(&symbolizer_allocator_);
> +  LibbacktraceSymbolizer* libbacktrace_symbolizer = 0;
>    ExternalSymbolizer *external_symbolizer = 0;
>
> -  if (!internal_symbolizer) {
> +  if (!internal_symbolizer)
> +    libbacktrace_symbolizer =
> +      LibbacktraceSymbolizer::get(&symbolizer_allocator_);
> +  if (!internal_symbolizer && !libbacktrace_symbolizer) {
>      if (!path_to_external || path_to_external[0] == '\0')
>        path_to_external = FindPathToBinary("llvm-symbolizer");
>
> @@ -593,7 +739,8 @@ Symbolizer *Symbolizer::PlatformInit(con
>    }
>
>    return new(symbolizer_allocator_)
> -      POSIXSymbolizer(external_symbolizer, internal_symbolizer);
> +      POSIXSymbolizer(external_symbolizer, libbacktrace_symbolizer,
> +                     internal_symbolizer);
>  }
>
>  }  // namespace __sanitizer
>
>
>         Jakub
Jakub Jelinek Nov. 19, 2013, 3:33 p.m. UTC | #2
On Tue, Nov 19, 2013 at 07:14:03PM +0400, Alexey Samsonov wrote:
> > +#if SANITIZER_LIBBACKTRACE
> > +namespace {
> > +
> > +struct SymbolizeCodeData {
> > +  AddressInfo *frames;
> > +  uptr n_frames;
> > +  uptr max_frames;
> 
> max_frames is not used anywhere.

I guess
    if (cdata->n_frames == cdata->max_frames)
      return 1;
near the end of SymbolizeCodePCInfoCallback should do it (returning
non-zero from the callback I think means no further callbacks will be
called).

> > +  const char *module_name;
> > +  uptr module_offset;
> > +};
> > +
> > +extern "C" {
> 
> why do you need extern "C" here?

Not for GCC, but generally I think the C++ standard requires that extern "C"
function taking function pointer argument implies the function must be
extern "C".  At least AFAIK in SunPro overload resolution treats
extern "C" and extern "C++" function pointers differently.

> > +class LibbacktraceSymbolizer {
> > + public:
> > +  static LibbacktraceSymbolizer *get(LowLevelAllocator *alloc) {
> > +    backtrace_state *state
> > +      = backtrace_create_state("/proc/self/exe", 1, ErrorCallback, NULL);
> 
> You may want to use ReadBinaryName() here (in case it was cached
> earlier, e.g. before
> we create a sandbox).

As the class is constructed during getSymbolizer(), it is surely performed
before any getSymbolizer()->PrepareForSandboxing(); can be called, so using
/proc/self/exe is IMHO both right and faster.
> 
> Looks like Libbacktrace symbolizer is now wrapped in a straightforward
> interface. Could you please
> move it it sanitizer_symbolizer_libbacktrace.{h,cc}, and keep changes
> to sanitizer_symbolizer_posix_libcdep.cc
> minimal?

I will try.

	Jakub
diff mbox

Patch

--- sanitizer_symbolizer_posix_libcdep.cc.jj	2013-11-12 19:35:30.000000000 +0100
+++ sanitizer_symbolizer_posix_libcdep.cc	2013-11-18 17:16:03.202643957 +0100
@@ -27,6 +27,16 @@ 
 #include <sys/wait.h>
 #include <unistd.h>
 
+#ifdef SANITIZER_LIBBACKTRACE
+#include "backtrace-supported.h"
+#if SANITIZER_LINUX && BACKTRACE_SUPPORTED \
+    && !BACKTRACE_USES_MALLOC && BACKTRACE_SUPPORTS_THREADS
+#include "backtrace.h"
+#else
+#undef SANITIZER_LIBBACKTRACE
+#endif
+#endif
+
 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
 // because we do not require a C++ ABI library to be linked to a program
 // using sanitizers; if it's not present, we'll just use the mangled name.
@@ -364,12 +374,124 @@  class InternalSymbolizer {
 
 #endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
 
+#if SANITIZER_LIBBACKTRACE
+namespace {
+
+struct SymbolizeCodeData {
+  AddressInfo *frames;
+  uptr n_frames;
+  uptr max_frames;
+  const char *module_name;
+  uptr module_offset;
+};
+
+extern "C" {
+
+static int SymbolizeCodePCInfoCallback(void *vdata, uintptr_t addr,
+				       const char *filename, int lineno,
+				       const char *function) {
+  SymbolizeCodeData *cdata = (SymbolizeCodeData *)vdata;
+  if (function) {
+    AddressInfo *info = &cdata->frames[cdata->n_frames++];
+    info->Clear();
+    info->FillAddressAndModuleInfo(addr, cdata->module_name, cdata->module_offset);
+    info->function = internal_strdup(function);
+    if (filename)
+      info->file = internal_strdup(filename);
+    info->line = lineno;
+  }
+  return 0;
+}
+
+static void SymbolizeCodeCallback(void *vdata, uintptr_t addr,
+				  const char *symname, uintptr_t) {
+  SymbolizeCodeData *cdata = (SymbolizeCodeData *)vdata;
+  if (symname) {
+    AddressInfo *info = &cdata->frames[0];
+    info->Clear();
+    info->FillAddressAndModuleInfo(addr, cdata->module_name, cdata->module_offset);
+    info->function = internal_strdup(symname);
+    cdata->n_frames = 1;
+  }
+}
+
+static void SymbolizeDataCallback(void *vdata, uintptr_t,
+				  const char *symname, uintptr_t symval) {
+  DataInfo *info = (DataInfo *)vdata;
+  if (symname && symval) {
+    info->name = internal_strdup(symname);
+    info->start = symval;
+  }
+}
+
+static void ErrorCallback(void *, const char *, int) {
+}
+
+}
+
+}
+
+class LibbacktraceSymbolizer {
+ public:
+  static LibbacktraceSymbolizer *get(LowLevelAllocator *alloc) {
+    backtrace_state *state
+      = backtrace_create_state("/proc/self/exe", 1, ErrorCallback, NULL);
+    if (!state)
+      return 0;
+    return new(*alloc) LibbacktraceSymbolizer(state);
+  }
+
+  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames,
+		     const char *module_name, uptr module_offset) {
+    SymbolizeCodeData data;
+    data.frames = frames;
+    data.n_frames = 0;
+    data.max_frames = max_frames;
+    data.module_name = module_name;
+    data.module_offset = module_offset;
+    backtrace_pcinfo(state_, addr, SymbolizeCodePCInfoCallback, ErrorCallback,
+		     &data);
+    if (data.n_frames)
+      return data.n_frames;
+    backtrace_syminfo(state_, addr, SymbolizeCodeCallback, ErrorCallback, &data);
+    return data.n_frames;
+  }
+
+  void SymbolizeData(DataInfo *info) {
+    backtrace_syminfo(state_, info->address, SymbolizeDataCallback,
+		      ErrorCallback, info);
+  }
+
+ private:
+  LibbacktraceSymbolizer(backtrace_state *state) : state_(state) { }
+
+  backtrace_state *state_;	// Leaked.
+};
+#else
+class LibbacktraceSymbolizer {
+ public:
+  static LibbacktraceSymbolizer *get(LowLevelAllocator *) {
+    return 0;
+  }
+
+  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames,
+		     const char *module_name, uptr module_offset) {
+    return 0;
+  }
+
+  void SymbolizeData(DataInfo *info) {
+  }
+};
+#endif
+
 class POSIXSymbolizer : public Symbolizer {
  public:
   POSIXSymbolizer(ExternalSymbolizer *external_symbolizer,
+		  LibbacktraceSymbolizer *libbacktrace_symbolizer,
                   InternalSymbolizer *internal_symbolizer)
       : Symbolizer(),
         external_symbolizer_(external_symbolizer),
+	libbacktrace_symbolizer_(libbacktrace_symbolizer),
         internal_symbolizer_(internal_symbolizer) {}
 
   uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
@@ -381,7 +503,18 @@  class POSIXSymbolizer : public Symbolize
       return 0;
     const char *module_name = module->full_name();
     uptr module_offset = addr - module->base_address();
-    const char *str = SendCommand(false, module_name, module_offset);
+    const char *str = 0;
+    if (libbacktrace_symbolizer_)
+      {
+	uptr ret
+	  = libbacktrace_symbolizer_->SymbolizeCode(addr, frames, max_frames,
+						    module_name,
+						    module_offset);
+	if (ret)
+	  return ret;
+      }
+    else
+      str = SendCommand(false, module_name, module_offset);
     if (str == 0) {
       // External symbolizer was not initialized or failed. Fill only data
       // about module name and offset.
@@ -444,6 +577,10 @@  class POSIXSymbolizer : public Symbolize
     info->address = addr;
     info->module = internal_strdup(module_name);
     info->module_offset = module_offset;
+    if (libbacktrace_symbolizer_) {
+      libbacktrace_symbolizer_->SymbolizeData(info);
+      return true;
+    }
     const char *str = SendCommand(true, module_name, module_offset);
     if (str == 0)
       return true;
@@ -455,7 +594,9 @@  class POSIXSymbolizer : public Symbolize
   }
 
   bool IsAvailable() {
-    return internal_symbolizer_ != 0 || external_symbolizer_ != 0;
+    return internal_symbolizer_ != 0 ||
+	   libbacktrace_symbolizer_ != 0 ||
+	   external_symbolizer_ != 0;
   }
 
   bool IsExternalAvailable() {
@@ -573,14 +714,19 @@  class POSIXSymbolizer : public Symbolize
 
   ExternalSymbolizer *external_symbolizer_;        // Leaked.
   InternalSymbolizer *const internal_symbolizer_;  // Leaked.
+  LibbacktraceSymbolizer *libbacktrace_symbolizer_;  // Leaked.
 };
 
 Symbolizer *Symbolizer::PlatformInit(const char *path_to_external) {
   InternalSymbolizer* internal_symbolizer =
       InternalSymbolizer::get(&symbolizer_allocator_);
+  LibbacktraceSymbolizer* libbacktrace_symbolizer = 0;
   ExternalSymbolizer *external_symbolizer = 0;
 
-  if (!internal_symbolizer) {
+  if (!internal_symbolizer)
+    libbacktrace_symbolizer =
+      LibbacktraceSymbolizer::get(&symbolizer_allocator_);
+  if (!internal_symbolizer && !libbacktrace_symbolizer) {
     if (!path_to_external || path_to_external[0] == '\0')
       path_to_external = FindPathToBinary("llvm-symbolizer");
 
@@ -593,7 +739,8 @@  Symbolizer *Symbolizer::PlatformInit(con
   }
 
   return new(symbolizer_allocator_)
-      POSIXSymbolizer(external_symbolizer, internal_symbolizer);
+      POSIXSymbolizer(external_symbolizer, libbacktrace_symbolizer,
+		      internal_symbolizer);
 }
 
 }  // namespace __sanitizer