diff mbox series

[v2,2/2] perf: Fix bpf prologue generation

Message ID 20200609081019.60234-3-sumanthk@linux.ibm.com
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series perf: Fix bpf prologue generation, uaccess | expand

Commit Message

Sumanth Korikkar June 9, 2020, 8:10 a.m. UTC
Issue:
bpf_probe_read is no longer available for architecture which has
overlapping address space. Hence bpf prologue generation fails

Fix:
Use bpf_probe_read_kernel for kernel member access. For user
attribute access in kprobes, use bpf_probe_read_user.

Other:
@user attribute was introduced in commit 1e032f7cfa14 ("perf-probe:
 Add user memory access attribute support")

Test:
1. ulimit -l 128 ; ./perf record -e tests/bpf_sched_setscheduler.c
2. cat tests/bpf_sched_setscheduler.c

static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
        (void *) 6;
static int (*bpf_probe_read_user)(void *dst, __u32 size,
                                  const void *unsafe_ptr) = (void *) 112;
static int (*bpf_probe_read_kernel)(void *dst, __u32 size,
        const void *unsafe_ptr) = (void *) 113;

SEC("func=do_sched_setscheduler  pid policy param->sched_priority@user")
int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy,
                           int param)
{
        char fmt[] = "prio: %ld";
        bpf_trace_printk(fmt, sizeof(fmt), param);
        return 1;
}

char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;

3. ./perf script
   sched 305669 [000] 1614458.838675: perf_bpf_probe:func: (2904e508)
   pid=261614 policy=2 sched_priority=1

4. cat /sys/kernel/debug/tracing/trace
   <...>-309956 [006] .... 1616098.093957: 0: prio: 1

Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Reviewed-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/bpf-prologue.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

Comments

Arnaldo Carvalho de Melo June 9, 2020, 3:27 p.m. UTC | #1
Em Tue, Jun 09, 2020 at 10:10:19AM +0200, Sumanth Korikkar escreveu:
> Issue:
> bpf_probe_read is no longer available for architecture which has
> overlapping address space. Hence bpf prologue generation fails
> 
> Fix:
> Use bpf_probe_read_kernel for kernel member access. For user
> attribute access in kprobes, use bpf_probe_read_user.
> 
> Other:
> @user attribute was introduced in commit 1e032f7cfa14 ("perf-probe:
>  Add user memory access attribute support")
> 
> Test:
> 1. ulimit -l 128 ; ./perf record -e tests/bpf_sched_setscheduler.c
> 2. cat tests/bpf_sched_setscheduler.c
> 
> static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
>         (void *) 6;
> static int (*bpf_probe_read_user)(void *dst, __u32 size,
>                                   const void *unsafe_ptr) = (void *) 112;
> static int (*bpf_probe_read_kernel)(void *dst, __u32 size,
>         const void *unsafe_ptr) = (void *) 113;
> 
> SEC("func=do_sched_setscheduler  pid policy param->sched_priority@user")
> int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy,
>                            int param)
> {
>         char fmt[] = "prio: %ld";
>         bpf_trace_printk(fmt, sizeof(fmt), param);
>         return 1;
> }
> 
> char _license[] SEC("license") = "GPL";
> int _version SEC("version") = LINUX_VERSION_CODE;
> 
> 3. ./perf script
>    sched 305669 [000] 1614458.838675: perf_bpf_probe:func: (2904e508)
>    pid=261614 policy=2 sched_priority=1
> 
> 4. cat /sys/kernel/debug/tracing/trace
>    <...>-309956 [006] .... 1616098.093957: 0: prio: 1

Thanks for providing a detailed set of steps to test your patch, that is
great!

I added this, an alterenative way to test it, combining all the aspects
in one 'perf trace' call:

Committer testing:

I had to add some missing headers in the bpf_sched_setscheduler.c test
proggie, then instead of using record+script I used 'perf trace' to
drive everything in one go:

  # cat bpf_sched_setscheduler.c
  #include <linux/types.h>
  #include <bpf.h>

  static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) 6;
  static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112;
  static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113;

  SEC("func=do_sched_setscheduler  pid policy param->sched_priority@user")
  int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy, int param)
  {
          char fmt[] = "prio: %ld";
          bpf_trace_printk(fmt, sizeof(fmt), param);
          return 1;
  }

  char _license[] SEC("license") = "GPL";
  int _version SEC("version") = LINUX_VERSION_CODE;
  #
  #
  # perf trace -e bpf_sched_setscheduler.c chrt -f 42 sleep 1
     0.000 chrt/80125 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42)
  #

And even with backtraces :-)

  # perf trace -e bpf_sched_setscheduler.c/max-stack=8/ chrt -f 42 sleep 1
       0.000 chrt/79805 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42)
                                         do_sched_setscheduler ([kernel.kallsyms])
                                         __x64_sys_sched_setscheduler ([kernel.kallsyms])
                                         do_syscall_64 ([kernel.kallsyms])
                                         entry_SYSCALL_64 ([kernel.kallsyms])
                                         __GI___sched_setscheduler (/usr/lib64/libc-2.30.so)
  # 

- Arnaldo
diff mbox series

Patch

diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
index b020a8678eb9..9887ae09242d 100644
--- a/tools/perf/util/bpf-prologue.c
+++ b/tools/perf/util/bpf-prologue.c
@@ -142,7 +142,8 @@  static int
 gen_read_mem(struct bpf_insn_pos *pos,
 	     int src_base_addr_reg,
 	     int dst_addr_reg,
-	     long offset)
+	     long offset,
+	     int probeid)
 {
 	/* mov arg3, src_base_addr_reg */
 	if (src_base_addr_reg != BPF_REG_ARG3)
@@ -159,7 +160,7 @@  gen_read_mem(struct bpf_insn_pos *pos,
 		ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
 
 	/* Call probe_read  */
-	ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+	ins(BPF_EMIT_CALL(probeid), pos);
 	/*
 	 * Error processing: if read fail, goto error code,
 	 * will be relocated. Target should be the start of
@@ -241,7 +242,7 @@  static int
 gen_prologue_slowpath(struct bpf_insn_pos *pos,
 		      struct probe_trace_arg *args, int nargs)
 {
-	int err, i;
+	int err, i, probeid;
 
 	for (i = 0; i < nargs; i++) {
 		struct probe_trace_arg *arg = &args[i];
@@ -276,11 +277,16 @@  gen_prologue_slowpath(struct bpf_insn_pos *pos,
 				stack_offset), pos);
 
 		ref = arg->ref;
+		probeid = BPF_FUNC_probe_read_kernel;
 		while (ref) {
 			pr_debug("prologue: arg %d: offset %ld\n",
 				 i, ref->offset);
+
+			if (ref->user_access)
+				probeid = BPF_FUNC_probe_read_user;
+
 			err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
-					   ref->offset);
+					   ref->offset, probeid);
 			if (err) {
 				pr_err("prologue: failed to generate probe_read function call\n");
 				goto errout;