[bpf-next] selftests/bpf: print backtrace on SIGSEGV in test_progs
diff mbox series

Message ID 20200223054320.2006995-1-andriin@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series
  • [bpf-next] selftests/bpf: print backtrace on SIGSEGV in test_progs
Related show

Commit Message

Andrii Nakryiko Feb. 23, 2020, 5:43 a.m. UTC
Due to various bugs in test clean up code (usually), if host system is
misconfigured, it happens that test_progs will just crash in the middle of
running a test with little to no indication of where and why the crash
happened. For cases where coredump is not readily available (e.g., inside
a CI), it's very helpful to have a stack trace, which lead to crash, to be
printed out. This change adds a signal handler that will capture and print out
symbolized backtrace:

  $ sudo ./test_progs -t mmap
  test_mmap:PASS:skel_open_and_load 0 nsec
  test_mmap:PASS:bss_mmap 0 nsec
  test_mmap:PASS:data_mmap 0 nsec
  Caught signal #11!
  Stack trace:
  ./test_progs(crash_handler+0x18)[0x42a888]
  /lib64/libpthread.so.0(+0xf5d0)[0x7f2aab5175d0]
  ./test_progs(test_mmap+0x3c0)[0x41f0a0]
  ./test_progs(main+0x160)[0x407d10]
  /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2aab15d3d5]
  ./test_progs[0x407ebc]
  [1]    1988412 segmentation fault (core dumped)  sudo ./test_progs -t mmap

Unfortunately, glibc's symbolization support is unable to symbolize static
functions, only global ones will be present in stack trace. But it's still a
step forward without adding extra libraries to get a better symbolization.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/testing/selftests/bpf/Makefile     |  2 +-
 tools/testing/selftests/bpf/test_progs.c | 26 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

Comments

Song Liu Feb. 24, 2020, 9:33 p.m. UTC | #1
> On Feb 22, 2020, at 9:43 PM, Andrii Nakryiko <andriin@fb.com> wrote:
> 
> Due to various bugs in test clean up code (usually), if host system is
> misconfigured, it happens that test_progs will just crash in the middle of
> running a test with little to no indication of where and why the crash
> happened. For cases where coredump is not readily available (e.g., inside
> a CI), it's very helpful to have a stack trace, which lead to crash, to be
> printed out. This change adds a signal handler that will capture and print out
> symbolized backtrace:
> 
>  $ sudo ./test_progs -t mmap
>  test_mmap:PASS:skel_open_and_load 0 nsec
>  test_mmap:PASS:bss_mmap 0 nsec
>  test_mmap:PASS:data_mmap 0 nsec
>  Caught signal #11!
>  Stack trace:
>  ./test_progs(crash_handler+0x18)[0x42a888]
>  /lib64/libpthread.so.0(+0xf5d0)[0x7f2aab5175d0]
>  ./test_progs(test_mmap+0x3c0)[0x41f0a0]
>  ./test_progs(main+0x160)[0x407d10]
>  /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2aab15d3d5]
>  ./test_progs[0x407ebc]
>  [1]    1988412 segmentation fault (core dumped)  sudo ./test_progs -t mmap
> 
> Unfortunately, glibc's symbolization support is unable to symbolize static
> functions, only global ones will be present in stack trace. But it's still a
> step forward without adding extra libraries to get a better symbolization.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
> tools/testing/selftests/bpf/Makefile     |  2 +-
> tools/testing/selftests/bpf/test_progs.c | 26 ++++++++++++++++++++++++
> 2 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 2a583196fa51..50c63c21e6fd 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -20,7 +20,7 @@ CLANG		?= clang
> LLC		?= llc
> LLVM_OBJCOPY	?= llvm-objcopy
> BPF_GCC		?= $(shell command -v bpf-gcc;)
> -CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)		\
> +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)	\
> 	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)	\
> 	  -Dbpf_prog_load=bpf_prog_test_load				\
> 	  -Dbpf_load_program=bpf_test_load_program
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index bab1e6f1d8f1..531ab3e7e5e5 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -6,6 +6,8 @@
> #include "bpf_rlimit.h"
> #include <argp.h>
> #include <string.h>
> +#include <signal.h>
> +#include <execinfo.h> /* backtrace */
> 
> /* defined in test_progs.h */
> struct test_env env = {};
> @@ -617,6 +619,22 @@ int cd_flavor_subdir(const char *exec_name)
> 	return chdir(flavor);
> }
> 
> +#define MAX_BACKTRACE_SZ 128
> +void crash_handler(int signum)
> +{
> +	void *bt[MAX_BACKTRACE_SZ];
> +	size_t sz;
> +
> +	sz = backtrace(bt, ARRAY_SIZE(bt));
> +
> +	if (env.test)
> +		dump_test_log(env.test, true);
> +	stdio_restore();
> +
> +	fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
> +	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
> +}
> +
> int main(int argc, char **argv)
> {
> 	static const struct argp argp = {
> @@ -624,8 +642,16 @@ int main(int argc, char **argv)
> 		.parser = parse_arg,
> 		.doc = argp_program_doc,
> 	};
> +	struct sigaction sigact = {
> +		.sa_handler = crash_handler,
> +		.sa_flags = SA_RESETHAND,
> +	};
> 	int err, i;
> 
> +	env.stdout = stdout;
> +	env.stderr = stderr;

We have the same code in stdio_hijack(). Maybe remove those in 
stdio_hijack()? 

> +	sigaction(SIGSEGV, &sigact, NULL);
> +
> 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
> 	if (err)
> 		return err;
> -- 
> 2.17.1
>
Andrii Nakryiko Feb. 25, 2020, 12:07 a.m. UTC | #2
On Mon, Feb 24, 2020 at 1:34 PM Song Liu <songliubraving@fb.com> wrote:
>
>
>
> > On Feb 22, 2020, at 9:43 PM, Andrii Nakryiko <andriin@fb.com> wrote:
> >
> > Due to various bugs in test clean up code (usually), if host system is
> > misconfigured, it happens that test_progs will just crash in the middle of
> > running a test with little to no indication of where and why the crash
> > happened. For cases where coredump is not readily available (e.g., inside
> > a CI), it's very helpful to have a stack trace, which lead to crash, to be
> > printed out. This change adds a signal handler that will capture and print out
> > symbolized backtrace:
> >
> >  $ sudo ./test_progs -t mmap
> >  test_mmap:PASS:skel_open_and_load 0 nsec
> >  test_mmap:PASS:bss_mmap 0 nsec
> >  test_mmap:PASS:data_mmap 0 nsec
> >  Caught signal #11!
> >  Stack trace:
> >  ./test_progs(crash_handler+0x18)[0x42a888]
> >  /lib64/libpthread.so.0(+0xf5d0)[0x7f2aab5175d0]
> >  ./test_progs(test_mmap+0x3c0)[0x41f0a0]
> >  ./test_progs(main+0x160)[0x407d10]
> >  /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2aab15d3d5]
> >  ./test_progs[0x407ebc]
> >  [1]    1988412 segmentation fault (core dumped)  sudo ./test_progs -t mmap
> >
> > Unfortunately, glibc's symbolization support is unable to symbolize static
> > functions, only global ones will be present in stack trace. But it's still a
> > step forward without adding extra libraries to get a better symbolization.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> > tools/testing/selftests/bpf/Makefile     |  2 +-
> > tools/testing/selftests/bpf/test_progs.c | 26 ++++++++++++++++++++++++
> > 2 files changed, 27 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> > index 2a583196fa51..50c63c21e6fd 100644
> > --- a/tools/testing/selftests/bpf/Makefile
> > +++ b/tools/testing/selftests/bpf/Makefile
> > @@ -20,7 +20,7 @@ CLANG               ?= clang
> > LLC           ?= llc
> > LLVM_OBJCOPY  ?= llvm-objcopy
> > BPF_GCC               ?= $(shell command -v bpf-gcc;)
> > -CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)           \
> > +CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
> >         -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)     \
> >         -Dbpf_prog_load=bpf_prog_test_load                            \
> >         -Dbpf_load_program=bpf_test_load_program
> > diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> > index bab1e6f1d8f1..531ab3e7e5e5 100644
> > --- a/tools/testing/selftests/bpf/test_progs.c
> > +++ b/tools/testing/selftests/bpf/test_progs.c
> > @@ -6,6 +6,8 @@
> > #include "bpf_rlimit.h"
> > #include <argp.h>
> > #include <string.h>
> > +#include <signal.h>
> > +#include <execinfo.h> /* backtrace */
> >
> > /* defined in test_progs.h */
> > struct test_env env = {};
> > @@ -617,6 +619,22 @@ int cd_flavor_subdir(const char *exec_name)
> >       return chdir(flavor);
> > }
> >
> > +#define MAX_BACKTRACE_SZ 128
> > +void crash_handler(int signum)
> > +{
> > +     void *bt[MAX_BACKTRACE_SZ];
> > +     size_t sz;
> > +
> > +     sz = backtrace(bt, ARRAY_SIZE(bt));
> > +
> > +     if (env.test)
> > +             dump_test_log(env.test, true);
> > +     stdio_restore();
> > +
> > +     fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
> > +     backtrace_symbols_fd(bt, sz, STDERR_FILENO);
> > +}
> > +
> > int main(int argc, char **argv)
> > {
> >       static const struct argp argp = {
> > @@ -624,8 +642,16 @@ int main(int argc, char **argv)
> >               .parser = parse_arg,
> >               .doc = argp_program_doc,
> >       };
> > +     struct sigaction sigact = {
> > +             .sa_handler = crash_handler,
> > +             .sa_flags = SA_RESETHAND,
> > +     };
> >       int err, i;
> >
> > +     env.stdout = stdout;
> > +     env.stderr = stderr;
>
> We have the same code in stdio_hijack(). Maybe remove those in
> stdio_hijack()?

Yeah, this is ugly. I'll just check for (!env.stdout) in signal
handler instead. Sending v2...

>
> > +     sigaction(SIGSEGV, &sigact, NULL);
> > +
> >       err = argp_parse(&argp, argc, argv, 0, NULL, &env);
> >       if (err)
> >               return err;
> > --
> > 2.17.1
> >
>

Patch
diff mbox series

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2a583196fa51..50c63c21e6fd 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -20,7 +20,7 @@  CLANG		?= clang
 LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)		\
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR)	\
 	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)	\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
 	  -Dbpf_load_program=bpf_test_load_program
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index bab1e6f1d8f1..531ab3e7e5e5 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -6,6 +6,8 @@ 
 #include "bpf_rlimit.h"
 #include <argp.h>
 #include <string.h>
+#include <signal.h>
+#include <execinfo.h> /* backtrace */
 
 /* defined in test_progs.h */
 struct test_env env = {};
@@ -617,6 +619,22 @@  int cd_flavor_subdir(const char *exec_name)
 	return chdir(flavor);
 }
 
+#define MAX_BACKTRACE_SZ 128
+void crash_handler(int signum)
+{
+	void *bt[MAX_BACKTRACE_SZ];
+	size_t sz;
+
+	sz = backtrace(bt, ARRAY_SIZE(bt));
+
+	if (env.test)
+		dump_test_log(env.test, true);
+	stdio_restore();
+
+	fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
+	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -624,8 +642,16 @@  int main(int argc, char **argv)
 		.parser = parse_arg,
 		.doc = argp_program_doc,
 	};
+	struct sigaction sigact = {
+		.sa_handler = crash_handler,
+		.sa_flags = SA_RESETHAND,
+	};
 	int err, i;
 
+	env.stdout = stdout;
+	env.stderr = stderr;
+	sigaction(SIGSEGV, &sigact, NULL);
+
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
 	if (err)
 		return err;