diff mbox

[RFC,6/6] xen: Add backtrace for serious issues.

Message ID 1435605913-23826-7-git-send-email-konrad.wilk@oracle.com
State New
Headers show

Commit Message

Konrad Rzeszutek Wilk June 29, 2015, 7:25 p.m. UTC
When debugging issues that caused the emulator to kill itself
or skipping certain operations (unable to write to host
registers) an stack trace will most definitly aid in debugging
the problem.

As such this patch uses the most basic backtrace to print out
details.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 hw/xen/xen_pt.c             |  3 +++
 include/hw/xen/xen_common.h |  1 +
 xen-hvm.c                   | 16 ++++++++++++++++
 3 files changed, 20 insertions(+)

Comments

Stefano Stabellini July 1, 2015, 1:06 p.m. UTC | #1
On Mon, 29 Jun 2015, Konrad Rzeszutek Wilk wrote:
> When debugging issues that caused the emulator to kill itself
> or skipping certain operations (unable to write to host
> registers) an stack trace will most definitly aid in debugging
> the problem.
> 
> As such this patch uses the most basic backtrace to print out
> details.
> 
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

I think it could be useful, but it cannot be done as a xen-hvm.c thing.
It should be somewhere generic, maybe under util? Stefan, any
suggestions?


>  hw/xen/xen_pt.c             |  3 +++
>  include/hw/xen/xen_common.h |  1 +
>  xen-hvm.c                   | 16 ++++++++++++++++
>  3 files changed, 20 insertions(+)
> 
> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
> index ea1ceda..1d256b9 100644
> --- a/hw/xen/xen_pt.c
> +++ b/hw/xen/xen_pt.c
> @@ -407,6 +407,7 @@ out:
>  
>          if (rc < 0) {
>              XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc);
> +            xen_dump_stack();
>          }
>      }
>  }
> @@ -421,6 +422,7 @@ static uint64_t xen_pt_bar_read(void *o, hwaddr addr,
>       * misconfiguration of the IOMMU. */
>      XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n",
>                 addr);
> +    xen_dump_stack();
>      return 0;
>  }
>  static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
> @@ -430,6 +432,7 @@ static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
>      /* Same comment as xen_pt_bar_read function */
>      XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n",
>                 addr);
> +    xen_dump_stack();
>  }
>  
>  static const MemoryRegionOps ops = {
> diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
> index 38f29fb..3983cfb 100644
> --- a/include/hw/xen/xen_common.h
> +++ b/include/hw/xen/xen_common.h
> @@ -165,6 +165,7 @@ void destroy_hvm_domain(bool reboot);
>  
>  /* shutdown/destroy current domain because of an error */
>  void xen_shutdown_fatal_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
> +void xen_dump_stack(void);
>  
>  #ifdef HVM_PARAM_VMPORT_REGS_PFN
>  static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom,
> diff --git a/xen-hvm.c b/xen-hvm.c
> index a92bc14..8bf4a57 100644
> --- a/xen-hvm.c
> +++ b/xen-hvm.c
> @@ -10,6 +10,7 @@
>  
>  #include <sys/mman.h>
>  
> +#include <execinfo.h>
>  #include "hw/pci/pci.h"
>  #include "hw/i386/pc.h"
>  #include "hw/xen/xen_common.h"
> @@ -1328,6 +1329,20 @@ void xen_register_framebuffer(MemoryRegion *mr)
>      framebuffer = mr;
>  }
>  
> +void xen_dump_stack(void)
> +{
> +    int nptrs;
> +#define SIZE 1024
> +    void *buffer[SIZE];
> +
> +    nptrs = backtrace(buffer, SIZE);
> +    if (!nptrs)
> +        return;
> +
> +    backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO);
> +#undef SIZE
> +}
> +
>  void xen_shutdown_fatal_error(const char *fmt, ...)
>  {
>      va_list ap;
> @@ -1335,6 +1350,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...)
>      va_start(ap, fmt);
>      vfprintf(stderr, fmt, ap);
>      va_end(ap);
> +    xen_dump_stack();
>      fprintf(stderr, "Will destroy the domain.\n");
>      /* destroy the domain */
>      qemu_system_shutdown_request();
> -- 
> 2.1.0
>
Stefan Hajnoczi July 2, 2015, 1:50 p.m. UTC | #2
On Wed, Jul 01, 2015 at 02:06:30PM +0100, Stefano Stabellini wrote:
> On Mon, 29 Jun 2015, Konrad Rzeszutek Wilk wrote:
> > When debugging issues that caused the emulator to kill itself
> > or skipping certain operations (unable to write to host
> > registers) an stack trace will most definitly aid in debugging
> > the problem.
> > 
> > As such this patch uses the most basic backtrace to print out
> > details.
> > 
> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> 
> I think it could be useful, but it cannot be done as a xen-hvm.c thing.
> It should be somewhere generic, maybe under util? Stefan, any
> suggestions?

Yes, it seems like a util/ thing.  backtrace() and
backtrace_symbols_fd() are glibc-specific so it must not break the build
on other platforms.

I think the reason we've surivived without backtraces so far is because
fatal errors are typically handled with abort(3).  It causes a core dump
so you have the full process state, including backtraces.

I'm fine with adding a backtrace function though since it's more
lightweight and allows for graceful shutdown or error recovery.
diff mbox

Patch

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index ea1ceda..1d256b9 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -407,6 +407,7 @@  out:
 
         if (rc < 0) {
             XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc);
+            xen_dump_stack();
         }
     }
 }
@@ -421,6 +422,7 @@  static uint64_t xen_pt_bar_read(void *o, hwaddr addr,
      * misconfiguration of the IOMMU. */
     XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n",
                addr);
+    xen_dump_stack();
     return 0;
 }
 static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
@@ -430,6 +432,7 @@  static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
     /* Same comment as xen_pt_bar_read function */
     XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n",
                addr);
+    xen_dump_stack();
 }
 
 static const MemoryRegionOps ops = {
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 38f29fb..3983cfb 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -165,6 +165,7 @@  void destroy_hvm_domain(bool reboot);
 
 /* shutdown/destroy current domain because of an error */
 void xen_shutdown_fatal_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void xen_dump_stack(void);
 
 #ifdef HVM_PARAM_VMPORT_REGS_PFN
 static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom,
diff --git a/xen-hvm.c b/xen-hvm.c
index a92bc14..8bf4a57 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -10,6 +10,7 @@ 
 
 #include <sys/mman.h>
 
+#include <execinfo.h>
 #include "hw/pci/pci.h"
 #include "hw/i386/pc.h"
 #include "hw/xen/xen_common.h"
@@ -1328,6 +1329,20 @@  void xen_register_framebuffer(MemoryRegion *mr)
     framebuffer = mr;
 }
 
+void xen_dump_stack(void)
+{
+    int nptrs;
+#define SIZE 1024
+    void *buffer[SIZE];
+
+    nptrs = backtrace(buffer, SIZE);
+    if (!nptrs)
+        return;
+
+    backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO);
+#undef SIZE
+}
+
 void xen_shutdown_fatal_error(const char *fmt, ...)
 {
     va_list ap;
@@ -1335,6 +1350,7 @@  void xen_shutdown_fatal_error(const char *fmt, ...)
     va_start(ap, fmt);
     vfprintf(stderr, fmt, ap);
     va_end(ap);
+    xen_dump_stack();
     fprintf(stderr, "Will destroy the domain.\n");
     /* destroy the domain */
     qemu_system_shutdown_request();