diff mbox series

[2/5] bpf: Add bpf_perf_event_output_kfunc

Message ID 20191229143740.29143-3-jolsa@kernel.org
State RFC
Delegated to: BPF Maintainers
Headers show
Series bpf: Add trampoline helpers | expand

Commit Message

Jiri Olsa Dec. 29, 2019, 2:37 p.m. UTC
Adding support to use perf_event_output in
BPF_TRACE_FENTRY/BPF_TRACE_FEXIT programs.

There are no pt_regs available in the trampoline,
so getting one via bpf_kfunc_regs array.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/trace/bpf_trace.c | 67 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

Comments

Alexei Starovoitov Jan. 6, 2020, 11:27 p.m. UTC | #1
On Sun, Dec 29, 2019 at 03:37:37PM +0100, Jiri Olsa wrote:
> Adding support to use perf_event_output in
> BPF_TRACE_FENTRY/BPF_TRACE_FEXIT programs.
> 
> There are no pt_regs available in the trampoline,
> so getting one via bpf_kfunc_regs array.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  kernel/trace/bpf_trace.c | 67 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e5ef4ae9edb5..1b270bbd9016 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1151,6 +1151,69 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  	}
>  }
>  
> +struct bpf_kfunc_regs {
> +	struct pt_regs regs[3];
> +};
> +
> +static DEFINE_PER_CPU(struct bpf_kfunc_regs, bpf_kfunc_regs);
> +static DEFINE_PER_CPU(int, bpf_kfunc_nest_level);

Thanks a bunch for working on it.

I don't understand why new regs array and nest level is needed.
Can raw_tp_prog_func_proto() be reused as-is?
Instead of patches 2,3,4 ?
Jiri Olsa Jan. 7, 2020, 12:25 p.m. UTC | #2
On Mon, Jan 06, 2020 at 03:27:21PM -0800, Alexei Starovoitov wrote:
> On Sun, Dec 29, 2019 at 03:37:37PM +0100, Jiri Olsa wrote:
> > Adding support to use perf_event_output in
> > BPF_TRACE_FENTRY/BPF_TRACE_FEXIT programs.
> > 
> > There are no pt_regs available in the trampoline,
> > so getting one via bpf_kfunc_regs array.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  kernel/trace/bpf_trace.c | 67 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 67 insertions(+)
> > 
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index e5ef4ae9edb5..1b270bbd9016 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -1151,6 +1151,69 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >  	}
> >  }
> >  
> > +struct bpf_kfunc_regs {
> > +	struct pt_regs regs[3];
> > +};
> > +
> > +static DEFINE_PER_CPU(struct bpf_kfunc_regs, bpf_kfunc_regs);
> > +static DEFINE_PER_CPU(int, bpf_kfunc_nest_level);
> 
> Thanks a bunch for working on it.
> 
> I don't understand why new regs array and nest level is needed.
> Can raw_tp_prog_func_proto() be reused as-is?
> Instead of patches 2,3,4 ?

I thought that we might want to trace functions within the
raw tracepoint call, which would be prevented if we used
the same nest variable

now I'm not sure if there's not some other issue with nesting
bpf programs like that.. I'll need to check

jirka
Alexei Starovoitov Jan. 7, 2020, 10:13 p.m. UTC | #3
On Tue, Jan 7, 2020 at 4:25 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Mon, Jan 06, 2020 at 03:27:21PM -0800, Alexei Starovoitov wrote:
> > On Sun, Dec 29, 2019 at 03:37:37PM +0100, Jiri Olsa wrote:
> > > Adding support to use perf_event_output in
> > > BPF_TRACE_FENTRY/BPF_TRACE_FEXIT programs.
> > >
> > > There are no pt_regs available in the trampoline,
> > > so getting one via bpf_kfunc_regs array.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > >  kernel/trace/bpf_trace.c | 67 ++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 67 insertions(+)
> > >
> > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > index e5ef4ae9edb5..1b270bbd9016 100644
> > > --- a/kernel/trace/bpf_trace.c
> > > +++ b/kernel/trace/bpf_trace.c
> > > @@ -1151,6 +1151,69 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> > >     }
> > >  }
> > >
> > > +struct bpf_kfunc_regs {
> > > +   struct pt_regs regs[3];
> > > +};
> > > +
> > > +static DEFINE_PER_CPU(struct bpf_kfunc_regs, bpf_kfunc_regs);
> > > +static DEFINE_PER_CPU(int, bpf_kfunc_nest_level);
> >
> > Thanks a bunch for working on it.
> >
> > I don't understand why new regs array and nest level is needed.
> > Can raw_tp_prog_func_proto() be reused as-is?
> > Instead of patches 2,3,4 ?
>
> I thought that we might want to trace functions within the
> raw tracepoint call, which would be prevented if we used
> the same nest variable
>
> now I'm not sure if there's not some other issue with nesting
> bpf programs like that.. I'll need to check

but nesting is what bpf_raw_tp_nest_level suppose to solve, no?
I just realized that we already have three *_nest_level counters
in that file. Not sure why one is not enough.
There was an issue in the past when tracepoint, kprobe and skb
collided and we had nasty memory corruption, but that was before
_nest_level was introduced. Not sure how we got to three independent
counters.
Jiri Olsa Jan. 8, 2020, 10:24 a.m. UTC | #4
On Tue, Jan 07, 2020 at 02:13:42PM -0800, Alexei Starovoitov wrote:
> On Tue, Jan 7, 2020 at 4:25 AM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Mon, Jan 06, 2020 at 03:27:21PM -0800, Alexei Starovoitov wrote:
> > > On Sun, Dec 29, 2019 at 03:37:37PM +0100, Jiri Olsa wrote:
> > > > Adding support to use perf_event_output in
> > > > BPF_TRACE_FENTRY/BPF_TRACE_FEXIT programs.
> > > >
> > > > There are no pt_regs available in the trampoline,
> > > > so getting one via bpf_kfunc_regs array.
> > > >
> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > > ---
> > > >  kernel/trace/bpf_trace.c | 67 ++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 67 insertions(+)
> > > >
> > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > index e5ef4ae9edb5..1b270bbd9016 100644
> > > > --- a/kernel/trace/bpf_trace.c
> > > > +++ b/kernel/trace/bpf_trace.c
> > > > @@ -1151,6 +1151,69 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> > > >     }
> > > >  }
> > > >
> > > > +struct bpf_kfunc_regs {
> > > > +   struct pt_regs regs[3];
> > > > +};
> > > > +
> > > > +static DEFINE_PER_CPU(struct bpf_kfunc_regs, bpf_kfunc_regs);
> > > > +static DEFINE_PER_CPU(int, bpf_kfunc_nest_level);
> > >
> > > Thanks a bunch for working on it.
> > >
> > > I don't understand why new regs array and nest level is needed.
> > > Can raw_tp_prog_func_proto() be reused as-is?
> > > Instead of patches 2,3,4 ?
> >
> > I thought that we might want to trace functions within the
> > raw tracepoint call, which would be prevented if we used
> > the same nest variable
> >
> > now I'm not sure if there's not some other issue with nesting
> > bpf programs like that.. I'll need to check
> 
> but nesting is what bpf_raw_tp_nest_level suppose to solve, no?
> I just realized that we already have three *_nest_level counters
> in that file. Not sure why one is not enough.
> There was an issue in the past when tracepoint, kprobe and skb
> collided and we had nasty memory corruption, but that was before
> _nest_level was introduced. Not sure how we got to three independent
> counters.

ok, I'm not sure what was the initial impuls for that now,
I'll make it share the counter with raw tracepoints

jirka
diff mbox series

Patch

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e5ef4ae9edb5..1b270bbd9016 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1151,6 +1151,69 @@  raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+struct bpf_kfunc_regs {
+	struct pt_regs regs[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_kfunc_regs, bpf_kfunc_regs);
+static DEFINE_PER_CPU(int, bpf_kfunc_nest_level);
+
+static struct pt_regs *get_bpf_kfunc_regs(void)
+{
+	struct bpf_kfunc_regs *tp_regs = this_cpu_ptr(&bpf_kfunc_regs);
+	int nest_level = this_cpu_inc_return(bpf_kfunc_nest_level);
+
+	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+		this_cpu_dec(bpf_kfunc_nest_level);
+		return ERR_PTR(-EBUSY);
+	}
+
+	return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_kfunc_regs(void)
+{
+	this_cpu_dec(bpf_kfunc_nest_level);
+}
+
+BPF_CALL_5(bpf_perf_event_output_kfunc, void *, ctx, struct bpf_map *, map,
+	   u64, flags, void *, data, u64, size)
+{
+	struct pt_regs *regs = get_bpf_kfunc_regs();
+	int ret;
+
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	perf_fetch_caller_regs(regs);
+	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+	put_bpf_kfunc_regs();
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_perf_event_output_proto_kfunc = {
+	.func		= bpf_perf_event_output_kfunc,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+static const struct bpf_func_proto *
+kfunc_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_perf_event_output_proto_kfunc;
+	default:
+		return tracing_func_proto(func_id, prog);
+	}
+}
+
 static const struct bpf_func_proto *
 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1160,6 +1223,10 @@  tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_output_proto;
 #endif
 	default:
+		if (prog->expected_attach_type == BPF_TRACE_FENTRY ||
+		    prog->expected_attach_type == BPF_TRACE_FEXIT)
+			return kfunc_prog_func_proto(func_id, prog);
+
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
 }