diff mbox series

[v2,bpf-next,5/8] bpf: introduce BPF_RAW_TRACEPOINT

Message ID 20180321185448.2806324-6-ast@fb.com
State Superseded, archived
Delegated to: BPF Maintainers
Headers show
Series bpf, tracing: introduce bpf raw tracepoints | expand

Commit Message

Alexei Starovoitov March 21, 2018, 6:54 p.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Introduce BPF_PROG_TYPE_RAW_TRACEPOINT bpf program type to access
kernel internal arguments of the tracepoints in their raw form.

From bpf program point of view the access to the arguments look like:
struct bpf_raw_tracepoint_args {
       __u64 args[0];
};

int bpf_prog(struct bpf_raw_tracepoint_args *ctx)
{
  // program can read args[N] where N depends on tracepoint
  // and statically verified at program load+attach time
}

kprobe+bpf infrastructure allows programs access function arguments.
This feature allows programs access raw tracepoint arguments.

Similar to proposed 'dynamic ftrace events' there are no abi guarantees
to what the tracepoints arguments are and what their meaning is.
The program needs to type cast args properly and use bpf_probe_read()
helper to access struct fields when argument is a pointer.

For every tracepoint __bpf_trace_##call function is prepared.
In assembler it looks like:
(gdb) disassemble __bpf_trace_xdp_exception
Dump of assembler code for function __bpf_trace_xdp_exception:
   0xffffffff81132080 <+0>:     mov    %ecx,%ecx
   0xffffffff81132082 <+2>:     jmpq   0xffffffff811231f0 <bpf_trace_run3>

where

TRACE_EVENT(xdp_exception,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp, u32 act),

The above assembler snippet is casting 32-bit 'act' field into 'u64'
to pass into bpf_trace_run3(), while 'dev' and 'xdp' args are passed as-is.
All of ~500 of __bpf_trace_*() functions are only 5-10 byte long
and in total this approach adds 7k bytes to .text and 8k bytes
to .rodata since the probe funcs need to appear in kallsyms.
The alternative of having __bpf_trace_##call being global in kallsyms
could have been to keep them static and add another pointer to these
static functions to 'struct trace_event_class' and 'struct trace_event_call',
but keeping them global simplifies implementation and keeps it indepedent
from the tracing side.

Also such approach gives the lowest possible overhead
while calling trace_xdp_exception() from kernel C code and
transitioning into bpf land.
Since tracepoint+bpf are used at speeds of 1M+ events per second
this is very valuable optimization.

Since ftrace and perf side are not involved the new
BPF_RAW_TRACEPOINT_OPEN sys_bpf command is introduced
that returns anon_inode FD of 'bpf-raw-tracepoint' object.

The user space looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint with prog attached
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);

Ctrl-C of tracing daemon or cmdline tool that uses this feature
will automatically detach bpf program, unload it and
unregister tracepoint probe.

On the kernel side for_each_kernel_tracepoint() is used
to find a tracepoint with "xdp_exception" name
(that would be __tracepoint_xdp_exception record)

Then kallsyms_lookup_name() is used to find the addr
of __bpf_trace_xdp_exception() probe function.

And finally tracepoint_probe_register() is used to connect probe
with tracepoint.

Addition of bpf_raw_tracepoint doesn't interfere with ftrace and perf
tracepoint mechanisms. perf_event_open() can be used in parallel
on the same tracepoint.
Multiple bpf_raw_tracepoint_open("xdp_exception", prog_fd) are permitted.
Each with its own bpf program. The kernel will execute
all tracepoint probes and all attached bpf programs.

In the future bpf_raw_tracepoints can be extended with
query/introspection logic.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_types.h    |   1 +
 include/linux/trace_events.h |  57 +++++++++
 include/trace/bpf_probe.h    |  87 +++++++++++++
 include/trace/define_trace.h |   1 +
 include/uapi/linux/bpf.h     |  11 ++
 kernel/bpf/syscall.c         |  87 +++++++++++++
 kernel/trace/bpf_trace.c     | 283 +++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 527 insertions(+)
 create mode 100644 include/trace/bpf_probe.h

Comments

Daniel Borkmann March 22, 2018, 9:43 a.m. UTC | #1
On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
[...]
> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
>  void perf_trace_buf_update(void *record, u16 type);
>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>  
> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3);
> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4);
> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5);
> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		    u64 arg8);
> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		    u64 arg8, u64 arg9);
> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10);
> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13);
> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13, u64 arg14);
> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13, u64 arg14, u64 arg15);
> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16);
> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>  			       struct trace_event_call *call, u64 count,
>  			       struct pt_regs *regs, struct hlist_head *head,
[...]
> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
>  
>  	return ret;
>  }
> +
> +static __always_inline
> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
> +{
> +	rcu_read_lock();
> +	preempt_disable();
> +	(void) BPF_PROG_RUN(prog, args);
> +	preempt_enable();
> +	rcu_read_unlock();
> +}
> +
> +#define EVAL1(FN, X) FN(X)
> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
> +
> +#define COPY(X) args[X - 1] = arg##X;
> +
> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
> +{
> +	u64 args[1];
> +
> +	EVAL1(COPY, 1);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
> +{
> +	u64 args[2];
> +
> +	EVAL2(COPY, 1, 2);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3)
> +{
> +	u64 args[3];
> +
> +	EVAL3(COPY, 1, 2, 3);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4)
> +{
> +	u64 args[4];
> +
> +	EVAL4(COPY, 1, 2, 3, 4);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5)
> +{
> +	u64 args[5];
> +
> +	EVAL5(COPY, 1, 2, 3, 4, 5);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6)
> +{
> +	u64 args[6];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
> +{
> +	u64 args[7];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL1(COPY, 7);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		    u64 arg8)
> +{
> +	u64 args[8];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL2(COPY, 7, 8);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		    u64 arg8, u64 arg9)
> +{
> +	u64 args[9];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL3(COPY, 7, 8, 9);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10)
> +{
> +	u64 args[10];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL4(COPY, 7, 8, 9, 10);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11)
> +{
> +	u64 args[11];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL5(COPY, 7, 8, 9, 10, 11);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
> +{
> +	u64 args[12];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
> +{
> +	u64 args[17];
> +
> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
> +	EVAL5(COPY, 13, 14, 15, 16, 17);
> +	__bpf_trace_run(prog, args);
> +}
> +EXPORT_SYMBOL_GPL(bpf_trace_run17);

Would be nice if we could generate all these above via macro, e.g. when we define
a hard upper limit for max number of tracepoint args anyway, so this gets automatically
adjusted as well. Maybe some of the logic from BPF_CALL_*() macros could be borrowed
for this purpose.

Thanks,
Daniel
Alexei Starovoitov March 22, 2018, 3:41 p.m. UTC | #2
On 3/22/18 2:43 AM, Daniel Borkmann wrote:
> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
> [...]
>> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
>>  void perf_trace_buf_update(void *record, u16 type);
>>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>>
>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3);
>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4);
>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5);
>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8);
>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8, u64 arg9);
>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10);
>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13);
>> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14);
>> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15);
>> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16);
>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>>  			       struct trace_event_call *call, u64 count,
>>  			       struct pt_regs *regs, struct hlist_head *head,
> [...]
>> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
>>
>>  	return ret;
>>  }
>> +
>> +static __always_inline
>> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>> +{
>> +	rcu_read_lock();
>> +	preempt_disable();
>> +	(void) BPF_PROG_RUN(prog, args);
>> +	preempt_enable();
>> +	rcu_read_unlock();
>> +}
>> +
>> +#define EVAL1(FN, X) FN(X)
>> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
>> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
>> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
>> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>> +
>> +#define COPY(X) args[X - 1] = arg##X;
>> +
>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
>> +{
>> +	u64 args[1];
>> +
>> +	EVAL1(COPY, 1);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
>> +{
>> +	u64 args[2];
>> +
>> +	EVAL2(COPY, 1, 2);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3)
>> +{
>> +	u64 args[3];
>> +
>> +	EVAL3(COPY, 1, 2, 3);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4)
>> +{
>> +	u64 args[4];
>> +
>> +	EVAL4(COPY, 1, 2, 3, 4);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5)
>> +{
>> +	u64 args[5];
>> +
>> +	EVAL5(COPY, 1, 2, 3, 4, 5);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6)
>> +{
>> +	u64 args[6];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
>> +{
>> +	u64 args[7];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL1(COPY, 7);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8)
>> +{
>> +	u64 args[8];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL2(COPY, 7, 8);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8, u64 arg9)
>> +{
>> +	u64 args[9];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL3(COPY, 7, 8, 9);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10)
>> +{
>> +	u64 args[10];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL4(COPY, 7, 8, 9, 10);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11)
>> +{
>> +	u64 args[11];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL5(COPY, 7, 8, 9, 10, 11);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
>> +{
>> +	u64 args[12];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
>> +{
>> +	u64 args[17];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>> +	EVAL5(COPY, 13, 14, 15, 16, 17);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run17);
>
> Would be nice if we could generate all these above via macro, e.g. when we define
> a hard upper limit for max number of tracepoint args anyway, so this gets automatically
> adjusted as well. Maybe some of the logic from BPF_CALL_*() macros could be borrowed
> for this purpose.

I've thought about it, but couldn't figure out how to do it.
Suggestions are welcome.
The preprocessor cannot expand a constant N into N statements.
There gotta be something like:
...
#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
for whatever maximum we will pick.
I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
Similar thing possible for u64 arg1, u64 arg2, ...
but it will be harder to read.
Looking forward what you can come up with.
Daniel Borkmann March 23, 2018, 11:13 p.m. UTC | #3
On 03/22/2018 04:41 PM, Alexei Starovoitov wrote:
> On 3/22/18 2:43 AM, Daniel Borkmann wrote:
>> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
>> [...]
>>> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
>>>  void perf_trace_buf_update(void *record, u16 type);
>>>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>>>
>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3);
>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4);
>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5);
>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6);
>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +            u64 arg8);
>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +            u64 arg8, u64 arg9);
>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10);
>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11);
>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>>> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13);
>>> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13, u64 arg14);
>>> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13, u64 arg14, u64 arg15);
>>> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16);
>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>>>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>>>                     struct trace_event_call *call, u64 count,
>>>                     struct pt_regs *regs, struct hlist_head *head,
>> [...]
>>> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
>>>
>>>      return ret;
>>>  }
>>> +
>>> +static __always_inline
>>> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>>> +{
>>> +    rcu_read_lock();
>>> +    preempt_disable();
>>> +    (void) BPF_PROG_RUN(prog, args);
>>> +    preempt_enable();
>>> +    rcu_read_unlock();
>>> +}
>>> +
>>> +#define EVAL1(FN, X) FN(X)
>>> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
>>> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
>>> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
>>> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>>> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>>> +
>>> +#define COPY(X) args[X - 1] = arg##X;
>>> +
>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
>>> +{
>>> +    u64 args[1];
>>> +
>>> +    EVAL1(COPY, 1);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
>>> +{
>>> +    u64 args[2];
>>> +
>>> +    EVAL2(COPY, 1, 2);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3)
>>> +{
>>> +    u64 args[3];
>>> +
>>> +    EVAL3(COPY, 1, 2, 3);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4)
>>> +{
>>> +    u64 args[4];
>>> +
>>> +    EVAL4(COPY, 1, 2, 3, 4);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5)
>>> +{
>>> +    u64 args[5];
>>> +
>>> +    EVAL5(COPY, 1, 2, 3, 4, 5);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6)
>>> +{
>>> +    u64 args[6];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
>>> +{
>>> +    u64 args[7];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL1(COPY, 7);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +            u64 arg8)
>>> +{
>>> +    u64 args[8];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL2(COPY, 7, 8);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +            u64 arg8, u64 arg9)
>>> +{
>>> +    u64 args[9];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL3(COPY, 7, 8, 9);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10)
>>> +{
>>> +    u64 args[10];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL4(COPY, 7, 8, 9, 10);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11)
>>> +{
>>> +    u64 args[11];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL5(COPY, 7, 8, 9, 10, 11);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
>>> +{
>>> +    u64 args[12];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
>>> +{
>>> +    u64 args[17];
>>> +
>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>> +    EVAL5(COPY, 13, 14, 15, 16, 17);
>>> +    __bpf_trace_run(prog, args);
>>> +}
>>> +EXPORT_SYMBOL_GPL(bpf_trace_run17);
>>
>> Would be nice if we could generate all these above via macro, e.g. when we define
>> a hard upper limit for max number of tracepoint args anyway, so this gets automatically
>> adjusted as well. Maybe some of the logic from BPF_CALL_*() macros could be borrowed
>> for this purpose.
> 
> I've thought about it, but couldn't figure out how to do it.
> Suggestions are welcome.
> The preprocessor cannot expand a constant N into N statements.
> There gotta be something like:
> ...
> #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
> #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
> for whatever maximum we will pick.

Right.

> I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
> Similar thing possible for u64 arg1, u64 arg2, ...
> but it will be harder to read.
> Looking forward what you can come up with.

Just took a quick look, so the below one would work for generating the
signature and function. I did till 9 here:

#define UNPACK(...)			__VA_ARGS__
#define REPEAT_1(FN, DL, X, ...)	FN(X)
#define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
#define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
#define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
#define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
#define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
#define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
#define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
#define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
#define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)

#define SARG(X)		u64 arg##X
#define COPY(X)		args[X] = arg##X

#define __DL_COM	(,)
#define __DL_SEM	(;)

#define __SEQ		0, 1, 2, 3, 4, 5, 6, 7, 8, 9

#define BPF_TRACE_DECL_x(x)						\
	void bpf_trace_run##x(struct bpf_prog *prog,			\
			      REPEAT(x, SARG, __DL_COM, __SEQ))
#define BPF_TRACE_DEFN_x(x)						\
	void bpf_trace_run##x(struct bpf_prog *prog,			\
			      REPEAT(x, SARG, __DL_COM, __SEQ))		\
	{								\
		u64 args[x];						\
		REPEAT(x, COPY, __DL_SEM, __SEQ);			\
		__bpf_trace_run(prog, args);				\
	}								\
	EXPORT_SYMBOL_GPL(bpf_trace_run##x)

So doing a ...

BPF_TRACE_DECL_x(5);
BPF_TRACE_DEFN_x(5);

... will generate in kernel/trace/bpf_trace.i:

void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4);
void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4)
{
	u64 args[5];
	args[0] = arg0 ;
	args[1] = arg1 ;
	args[2] = arg2 ;
	args[3] = arg3 ;
	args[4] = arg4;
	__bpf_trace_run(prog, args);
} [...]

Meaning, the EVALx() macros could be removed from there, too. Potentially, the
REPEAT() macro could sit in its own include/linux/ header for others to reuse
or such.

Cheers,
Daniel
Alexei Starovoitov March 24, 2018, 12:58 a.m. UTC | #4
On 3/23/18 4:13 PM, Daniel Borkmann wrote:
> On 03/22/2018 04:41 PM, Alexei Starovoitov wrote:
>> On 3/22/18 2:43 AM, Daniel Borkmann wrote:
>>> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
>>> [...]
>>>> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
>>>>  void perf_trace_buf_update(void *record, u16 type);
>>>>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>>>>
>>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
>>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
>>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3);
>>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4);
>>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5);
>>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6);
>>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
>>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +            u64 arg8);
>>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +            u64 arg8, u64 arg9);
>>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10);
>>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11);
>>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>>>> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13);
>>>> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13, u64 arg14);
>>>> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13, u64 arg14, u64 arg15);
>>>> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16);
>>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>>>>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>>>>                     struct trace_event_call *call, u64 count,
>>>>                     struct pt_regs *regs, struct hlist_head *head,
>>> [...]
>>>> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
>>>>
>>>>      return ret;
>>>>  }
>>>> +
>>>> +static __always_inline
>>>> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>>>> +{
>>>> +    rcu_read_lock();
>>>> +    preempt_disable();
>>>> +    (void) BPF_PROG_RUN(prog, args);
>>>> +    preempt_enable();
>>>> +    rcu_read_unlock();
>>>> +}
>>>> +
>>>> +#define EVAL1(FN, X) FN(X)
>>>> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
>>>> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
>>>> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
>>>> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>>>> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>>>> +
>>>> +#define COPY(X) args[X - 1] = arg##X;
>>>> +
>>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
>>>> +{
>>>> +    u64 args[1];
>>>> +
>>>> +    EVAL1(COPY, 1);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
>>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
>>>> +{
>>>> +    u64 args[2];
>>>> +
>>>> +    EVAL2(COPY, 1, 2);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
>>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3)
>>>> +{
>>>> +    u64 args[3];
>>>> +
>>>> +    EVAL3(COPY, 1, 2, 3);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
>>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4)
>>>> +{
>>>> +    u64 args[4];
>>>> +
>>>> +    EVAL4(COPY, 1, 2, 3, 4);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
>>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5)
>>>> +{
>>>> +    u64 args[5];
>>>> +
>>>> +    EVAL5(COPY, 1, 2, 3, 4, 5);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
>>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6)
>>>> +{
>>>> +    u64 args[6];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
>>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
>>>> +{
>>>> +    u64 args[7];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL1(COPY, 7);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
>>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +            u64 arg8)
>>>> +{
>>>> +    u64 args[8];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL2(COPY, 7, 8);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
>>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +            u64 arg8, u64 arg9)
>>>> +{
>>>> +    u64 args[9];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL3(COPY, 7, 8, 9);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
>>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10)
>>>> +{
>>>> +    u64 args[10];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL4(COPY, 7, 8, 9, 10);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
>>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11)
>>>> +{
>>>> +    u64 args[11];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL5(COPY, 7, 8, 9, 10, 11);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
>>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
>>>> +{
>>>> +    u64 args[12];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
>>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
>>>> +{
>>>> +    u64 args[17];
>>>> +
>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>>> +    EVAL5(COPY, 13, 14, 15, 16, 17);
>>>> +    __bpf_trace_run(prog, args);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run17);
>>>
>>> Would be nice if we could generate all these above via macro, e.g. when we define
>>> a hard upper limit for max number of tracepoint args anyway, so this gets automatically
>>> adjusted as well. Maybe some of the logic from BPF_CALL_*() macros could be borrowed
>>> for this purpose.
>>
>> I've thought about it, but couldn't figure out how to do it.
>> Suggestions are welcome.
>> The preprocessor cannot expand a constant N into N statements.
>> There gotta be something like:
>> ...
>> #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>> #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>> for whatever maximum we will pick.
>
> Right.
>
>> I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
>> Similar thing possible for u64 arg1, u64 arg2, ...
>> but it will be harder to read.
>> Looking forward what you can come up with.
>
> Just took a quick look, so the below one would work for generating the
> signature and function. I did till 9 here:
>
> #define UNPACK(...)			__VA_ARGS__
> #define REPEAT_1(FN, DL, X, ...)	FN(X)
> #define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
> #define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
> #define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
> #define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
> #define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
> #define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
> #define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
> #define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
> #define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
>
> #define SARG(X)		u64 arg##X
> #define COPY(X)		args[X] = arg##X
>
> #define __DL_COM	(,)
> #define __DL_SEM	(;)
>
> #define __SEQ		0, 1, 2, 3, 4, 5, 6, 7, 8, 9
>
> #define BPF_TRACE_DECL_x(x)						\
> 	void bpf_trace_run##x(struct bpf_prog *prog,			\
> 			      REPEAT(x, SARG, __DL_COM, __SEQ))
> #define BPF_TRACE_DEFN_x(x)						\
> 	void bpf_trace_run##x(struct bpf_prog *prog,			\
> 			      REPEAT(x, SARG, __DL_COM, __SEQ))		\
> 	{								\
> 		u64 args[x];						\
> 		REPEAT(x, COPY, __DL_SEM, __SEQ);			\
> 		__bpf_trace_run(prog, args);				\
> 	}								\
> 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
>
> So doing a ...
>
> BPF_TRACE_DECL_x(5);
> BPF_TRACE_DEFN_x(5);

interestingly that in addition to above defining
#define __REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
to allow recursive expansion and doing
__REPEAT(12, BPF_TRACE_DECL_x, __DL_SEM, __SEQ_1_12);
almost works...
I'm guessing it's hitting preprocessor internal limit on
number of expressions to expand.
It expands 1-6 nicely and 7-12 are partially expanded :)
I guess I have to use
BPF_TRACE_DECL_x(1);
BPF_TRACE_DECL_x(2);
BPF_TRACE_DECL_x(3);
BPF_TRACE_DECL_x(4);
...
BPF_TRACE_DECL_x(12);
which doesn't look better than open coding them.
Only for BPF_TRACE_DEFN_x it's probably worth it.

> ... will generate in kernel/trace/bpf_trace.i:
>
> void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4);
> void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4)
> {
> 	u64 args[5];
> 	args[0] = arg0 ;
> 	args[1] = arg1 ;
> 	args[2] = arg2 ;
> 	args[3] = arg3 ;
> 	args[4] = arg4;
> 	__bpf_trace_run(prog, args);
> } [...]
>
> Meaning, the EVALx() macros could be removed from there, too. Potentially, the
> REPEAT() macro could sit in its own include/linux/ header for others to reuse
> or such.

feels too specific for this use case. I'd wait second user before
moving to include/linux/kernel.h
Steven Rostedt March 24, 2018, 1:39 a.m. UTC | #5
On Sat, 24 Mar 2018 00:13:28 +0100
Daniel Borkmann <daniel@iogearbox.net> wrote:

> #define UNPACK(...)			__VA_ARGS__
> #define REPEAT_1(FN, DL, X, ...)	FN(X)
> #define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
> #define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
> #define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
> #define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
> #define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
> #define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
> #define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
> #define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
> #define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
> 
> #define SARG(X)		u64 arg##X
> #define COPY(X)		args[X] = arg##X
> 
> #define __DL_COM	(,)
> #define __DL_SEM	(;)
> 
> #define __SEQ		0, 1, 2, 3, 4, 5, 6, 7, 8, 9
> 
> #define BPF_TRACE_DECL_x(x)						\
> 	void bpf_trace_run##x(struct bpf_prog *prog,			\
> 			      REPEAT(x, SARG, __DL_COM, __SEQ))
> #define BPF_TRACE_DEFN_x(x)						\
> 	void bpf_trace_run##x(struct bpf_prog *prog,			\
> 			      REPEAT(x, SARG, __DL_COM, __SEQ))		\
> 	{								\
> 		u64 args[x];						\
> 		REPEAT(x, COPY, __DL_SEM, __SEQ);			\
> 		__bpf_trace_run(prog, args);				\
> 	}								\
> 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
> 
> So doing a ...
> 
> BPF_TRACE_DECL_x(5);
> BPF_TRACE_DEFN_x(5);
> 
> ... will generate in kernel/trace/bpf_trace.i:
> 
> void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4);
> void bpf_foo_trace_run5(struct bpf_prog *prog, u64 arg0 , u64 arg1 , u64 arg2 , u64 arg3 , u64 arg4)
> {
> 	u64 args[5];
> 	args[0] = arg0 ;
> 	args[1] = arg1 ;
> 	args[2] = arg2 ;
> 	args[3] = arg3 ;
> 	args[4] = arg4;
> 	__bpf_trace_run(prog, args);
> } [...]
> 
> Meaning, the EVALx() macros could be removed from there, too. Potentially, the
> REPEAT() macro could sit in its own include/linux/ header for others to reuse
> or such.

And people think my macro magic in include/trace/ftrace_event.h is
funky. Now I know who stole my MACRO MAGIC HAT.

-- Steve
Alexei Starovoitov March 24, 2018, 1:43 a.m. UTC | #6
On 3/23/18 5:58 PM, Alexei Starovoitov wrote:
> On 3/23/18 4:13 PM, Daniel Borkmann wrote:
>> On 03/22/2018 04:41 PM, Alexei Starovoitov wrote:
>>> On 3/22/18 2:43 AM, Daniel Borkmann wrote:
>>>> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
>>>> [...]
>>>>> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct
>>>>> perf_event *event);
>>>>>  void perf_trace_buf_update(void *record, u16 type);
>>>>>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int
>>>>> *rctxp);
>>>>>
>>>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
>>>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
>>>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3);
>>>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4);
>>>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5);
>>>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6);
>>>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
>>>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +            u64 arg8);
>>>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +            u64 arg8, u64 arg9);
>>>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10);
>>>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11);
>>>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>>>>> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13);
>>>>> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13, u64 arg14);
>>>>> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13, u64 arg14, u64 arg15);
>>>>> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16);
>>>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>>>>>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>>>>>                     struct trace_event_call *call, u64 count,
>>>>>                     struct pt_regs *regs, struct hlist_head *head,
>>>> [...]
>>>>> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct
>>>>> perf_event *event, void __user *info)
>>>>>
>>>>>      return ret;
>>>>>  }
>>>>> +
>>>>> +static __always_inline
>>>>> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>>>>> +{
>>>>> +    rcu_read_lock();
>>>>> +    preempt_disable();
>>>>> +    (void) BPF_PROG_RUN(prog, args);
>>>>> +    preempt_enable();
>>>>> +    rcu_read_unlock();
>>>>> +}
>>>>> +
>>>>> +#define EVAL1(FN, X) FN(X)
>>>>> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
>>>>> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
>>>>> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
>>>>> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>>>>> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>>>>> +
>>>>> +#define COPY(X) args[X - 1] = arg##X;
>>>>> +
>>>>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
>>>>> +{
>>>>> +    u64 args[1];
>>>>> +
>>>>> +    EVAL1(COPY, 1);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
>>>>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
>>>>> +{
>>>>> +    u64 args[2];
>>>>> +
>>>>> +    EVAL2(COPY, 1, 2);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
>>>>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3)
>>>>> +{
>>>>> +    u64 args[3];
>>>>> +
>>>>> +    EVAL3(COPY, 1, 2, 3);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
>>>>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4)
>>>>> +{
>>>>> +    u64 args[4];
>>>>> +
>>>>> +    EVAL4(COPY, 1, 2, 3, 4);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
>>>>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5)
>>>>> +{
>>>>> +    u64 args[5];
>>>>> +
>>>>> +    EVAL5(COPY, 1, 2, 3, 4, 5);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
>>>>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6)
>>>>> +{
>>>>> +    u64 args[6];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
>>>>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
>>>>> +{
>>>>> +    u64 args[7];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL1(COPY, 7);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
>>>>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +            u64 arg8)
>>>>> +{
>>>>> +    u64 args[8];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL2(COPY, 7, 8);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
>>>>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +            u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +            u64 arg8, u64 arg9)
>>>>> +{
>>>>> +    u64 args[9];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL3(COPY, 7, 8, 9);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
>>>>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10)
>>>>> +{
>>>>> +    u64 args[10];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL4(COPY, 7, 8, 9, 10);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
>>>>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11)
>>>>> +{
>>>>> +    u64 args[11];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL5(COPY, 7, 8, 9, 10, 11);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
>>>>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
>>>>> +{
>>>>> +    u64 args[12];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
>>>>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>>>>> +             u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>>>>> +             u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>>>>> +             u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
>>>>> +{
>>>>> +    u64 args[17];
>>>>> +
>>>>> +    EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>>>>> +    EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>>>>> +    EVAL5(COPY, 13, 14, 15, 16, 17);
>>>>> +    __bpf_trace_run(prog, args);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(bpf_trace_run17);
>>>>
>>>> Would be nice if we could generate all these above via macro, e.g.
>>>> when we define
>>>> a hard upper limit for max number of tracepoint args anyway, so this
>>>> gets automatically
>>>> adjusted as well. Maybe some of the logic from BPF_CALL_*() macros
>>>> could be borrowed
>>>> for this purpose.
>>>
>>> I've thought about it, but couldn't figure out how to do it.
>>> Suggestions are welcome.
>>> The preprocessor cannot expand a constant N into N statements.
>>> There gotta be something like:
>>> ...
>>> #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>>> #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>>> for whatever maximum we will pick.
>>
>> Right.
>>
>>> I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
>>> Similar thing possible for u64 arg1, u64 arg2, ...
>>> but it will be harder to read.
>>> Looking forward what you can come up with.
>>
>> Just took a quick look, so the below one would work for generating the
>> signature and function. I did till 9 here:
>>
>> #define UNPACK(...)            __VA_ARGS__
>> #define REPEAT_1(FN, DL, X, ...)    FN(X)
>> #define REPEAT_2(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_1(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_3(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_2(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_4(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_3(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_5(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_4(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_6(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_5(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_7(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_6(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_8(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_7(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT_9(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_8(FN, DL,
>> __VA_ARGS__)
>> #define REPEAT(X, FN, DL, ...)        REPEAT_##X(FN, DL, __VA_ARGS__)
>>
>> #define SARG(X)        u64 arg##X
>> #define COPY(X)        args[X] = arg##X
>>
>> #define __DL_COM    (,)
>> #define __DL_SEM    (;)
>>
>> #define __SEQ        0, 1, 2, 3, 4, 5, 6, 7, 8, 9
>>
>> #define BPF_TRACE_DECL_x(x)                        \
>>     void bpf_trace_run##x(struct bpf_prog *prog,            \
>>                   REPEAT(x, SARG, __DL_COM, __SEQ))
>> #define BPF_TRACE_DEFN_x(x)                        \
>>     void bpf_trace_run##x(struct bpf_prog *prog,            \
>>                   REPEAT(x, SARG, __DL_COM, __SEQ))        \
>>     {                                \
>>         u64 args[x];                        \
>>         REPEAT(x, COPY, __DL_SEM, __SEQ);            \
>>         __bpf_trace_run(prog, args);                \
>>     }                                \
>>     EXPORT_SYMBOL_GPL(bpf_trace_run##x)
>>
>> So doing a ...
>>
>> BPF_TRACE_DECL_x(5);
>> BPF_TRACE_DEFN_x(5);
>
> interestingly that in addition to above defining
> #define __REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
> to allow recursive expansion and doing
> __REPEAT(12, BPF_TRACE_DECL_x, __DL_SEM, __SEQ_1_12);
> almost works...
> I'm guessing it's hitting preprocessor internal limit on
> number of expressions to expand.
> It expands 1-6 nicely and 7-12 are partially expanded :)

it's not the limit I'm hitting, but self referential issue.
Exactly half gets expanded.
I don't think there is an easy workaround other
than duplicating the whole chain of REPEAT macro twice
with slightly different name.
Linus Torvalds March 24, 2018, 2:01 a.m. UTC | #7
On Fri, Mar 23, 2018 at 6:43 PM, Alexei Starovoitov <ast@fb.com> wrote:
>
> it's not the limit I'm hitting, but self referential issue.
> Exactly half gets expanded.
> I don't think there is an easy workaround other
> than duplicating the whole chain of REPEAT macro twice
> with slightly different name.

Take a look at the __MAP() macro in include/linux/syscalls.h.

It basically takes a "transformation" as its argument, and does it <n>
times, where 'n' is the first argument (but could be self-counting).

Maybe it will give you some ideas.

... and maybe it will just drive you mad and make you gouge out your
eyes with a spoon. Don't blame the messenger.

             Linus
Daniel Borkmann March 26, 2018, 7:53 a.m. UTC | #8
On 03/24/2018 02:43 AM, Alexei Starovoitov wrote:
> On 3/23/18 5:58 PM, Alexei Starovoitov wrote:
>> On 3/23/18 4:13 PM, Daniel Borkmann wrote:
>>> On 03/22/2018 04:41 PM, Alexei Starovoitov wrote:
>>>> On 3/22/18 2:43 AM, Daniel Borkmann wrote:
>>>>> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
[...]
>>>> I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
>>>> Similar thing possible for u64 arg1, u64 arg2, ...
>>>> but it will be harder to read.
>>>> Looking forward what you can come up with.
>>>
>>> Just took a quick look, so the below one would work for generating the
>>> signature and function. I did till 9 here:
>>>
>>> #define UNPACK(...)            __VA_ARGS__
>>> #define REPEAT_1(FN, DL, X, ...)    FN(X)
>>> #define REPEAT_2(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_1(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_3(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_2(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_4(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_3(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_5(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_4(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_6(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_5(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_7(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_6(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_8(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_7(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT_9(FN, DL, X, ...)    FN(X) UNPACK DL REPEAT_8(FN, DL,
>>> __VA_ARGS__)
>>> #define REPEAT(X, FN, DL, ...)        REPEAT_##X(FN, DL, __VA_ARGS__)
>>>
>>> #define SARG(X)        u64 arg##X
>>> #define COPY(X)        args[X] = arg##X
>>>
>>> #define __DL_COM    (,)
>>> #define __DL_SEM    (;)
>>>
>>> #define __SEQ        0, 1, 2, 3, 4, 5, 6, 7, 8, 9
>>>
>>> #define BPF_TRACE_DECL_x(x)                        \
>>>     void bpf_trace_run##x(struct bpf_prog *prog,            \
>>>                   REPEAT(x, SARG, __DL_COM, __SEQ))
>>> #define BPF_TRACE_DEFN_x(x)                        \
>>>     void bpf_trace_run##x(struct bpf_prog *prog,            \
>>>                   REPEAT(x, SARG, __DL_COM, __SEQ))        \
>>>     {                                \
>>>         u64 args[x];                        \
>>>         REPEAT(x, COPY, __DL_SEM, __SEQ);            \
>>>         __bpf_trace_run(prog, args);                \
>>>     }                                \
>>>     EXPORT_SYMBOL_GPL(bpf_trace_run##x)
>>>
>>> So doing a ...
>>>
>>> BPF_TRACE_DECL_x(5);
>>> BPF_TRACE_DEFN_x(5);
>>
>> interestingly that in addition to above defining
>> #define __REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
>> to allow recursive expansion and doing
>> __REPEAT(12, BPF_TRACE_DECL_x, __DL_SEM, __SEQ_1_12);
>> almost works...
>> I'm guessing it's hitting preprocessor internal limit on
>> number of expressions to expand.
>> It expands 1-6 nicely and 7-12 are partially expanded :)
> 
> it's not the limit I'm hitting, but self referential issue.
> Exactly half gets expanded.
> I don't think there is an easy workaround other
> than duplicating the whole chain of REPEAT macro twice
> with slightly different name.

Hmm, that is kind of annoying, probably worth filing a bug on gcc, we still
won't be able to use it near term though.

Given it expands just fine from 1-6 arguments, I think Steven had a good
choice on upper limit of 6 args then (including build error with above). ;-)

Thanks,
Daniel
diff mbox series

Patch

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5e2e8a49fb21..6d7243bfb0ff 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -19,6 +19,7 @@  BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8a1442c4e513..46d76bbd5668 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -468,6 +468,8 @@  unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
 int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
 void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
+int bpf_probe_register(struct tracepoint *tp, struct bpf_prog *prog);
+int bpf_probe_unregister(struct tracepoint *tp, struct bpf_prog *prog);
 #else
 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 {
@@ -487,6 +489,14 @@  perf_event_query_prog_array(struct perf_event *event, void __user *info)
 {
 	return -EOPNOTSUPP;
 }
+static inline int bpf_probe_register(struct tracepoint *tp, struct bpf_prog *p)
+{
+	return -EOPNOTSUPP;
+}
+static inline int bpf_probe_unregister(struct tracepoint *tp, struct bpf_prog *p)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 enum {
@@ -546,6 +556,53 @@  extern void ftrace_profile_free_filter(struct perf_event *event);
 void perf_trace_buf_update(void *record, u16 type);
 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 
+void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
+void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
+void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3);
+void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4);
+void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5);
+void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
+void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8);
+void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8, u64 arg9);
+void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10);
+void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
+void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
+void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13);
+void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13, u64 arg14);
+void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13, u64 arg14, u64 arg15);
+void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13, u64 arg14, u64 arg15, u64 arg16);
+void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 			       struct trace_event_call *call, u64 count,
 			       struct pt_regs *regs, struct hlist_head *head,
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
new file mode 100644
index 000000000000..f67876794de8
--- /dev/null
+++ b/include/trace/bpf_probe.h
@@ -0,0 +1,87 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef TRACE_SYSTEM_VAR
+
+#ifdef CONFIG_BPF_EVENTS
+
+#undef __entry
+#define __entry entry
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
+
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
+#undef __get_str
+#define __get_str(field) ((char *)__get_dynamic_array(field))
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __perf_count
+#define __perf_count(c)	(c)
+
+#undef __perf_task
+#define __perf_task(t)	(t)
+
+/*
+ * cast any integer or pointer type to u64 without warnings
+ * on 32 and 64 bit archs
+ */
+#define __CAST_TO_U64(expr) \
+	(u64) __builtin_choose_expr(sizeof(long) < sizeof(expr), \
+				    (expr), \
+				    (long) expr)
+#define __CAST1(a,...) __CAST_TO_U64(a)
+#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
+#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
+#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
+#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
+#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
+#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
+#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
+#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
+#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
+#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
+#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
+#define __CAST13(a,...) __CAST_TO_U64(a), __CAST12(__VA_ARGS__)
+#define __CAST14(a,...) __CAST_TO_U64(a), __CAST13(__VA_ARGS__)
+#define __CAST15(a,...) __CAST_TO_U64(a), __CAST14(__VA_ARGS__)
+#define __CAST16(a,...) __CAST_TO_U64(a), __CAST15(__VA_ARGS__)
+#define __CAST17(a,...) __CAST_TO_U64(a), __CAST16(__VA_ARGS__)
+
+#define CAST_TO_U64(...) __FN_COUNT(__CAST,##__VA_ARGS__)(__VA_ARGS__)
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+/* no 'static' here. The bpf probe functions are global */		\
+notrace void								\
+__bpf_trace_##call(void *__data, proto)					\
+{									\
+	struct bpf_prog *prog = __data;					\
+	\
+	__FN_COUNT(bpf_trace_run, args)(prog, CAST_TO_U64(args));	\
+}
+
+/*
+ * This part is compiled out, it is only here as a build time check
+ * to make sure that if the tracepoint handling changes, the
+ * bpf probe will fail to compile unless it too is updated.
+ */
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+static inline void bpf_test_probe_##call(void)				\
+{									\
+	check_trace_callback_type_##call(__bpf_trace_##template);	\
+}
+
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index c040eda95d41..3bbd3b88177f 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -95,6 +95,7 @@ 
 #ifdef TRACEPOINTS_ENABLED
 #include <trace/trace_events.h>
 #include <trace/perf.h>
+#include <trace/bpf_probe.h>
 #endif
 
 #undef TRACE_EVENT
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..1878201c2d77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@  enum bpf_cmd {
 	BPF_MAP_GET_FD_BY_ID,
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
+	BPF_RAW_TRACEPOINT_OPEN,
 };
 
 enum bpf_map_type {
@@ -134,6 +135,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_RAW_TRACEPOINT,
 };
 
 enum bpf_attach_type {
@@ -344,6 +346,11 @@  union bpf_attr {
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
 	} query;
+
+	struct {
+		__u64 name;
+		__u32 prog_fd;
+	} raw_tracepoint;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -1152,4 +1159,8 @@  struct bpf_cgroup_dev_ctx {
 	__u32 minor;
 };
 
+struct bpf_raw_tracepoint_args {
+	__u64 args[0];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3aeb4ea2a93a..96bc45a6e7d6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1311,6 +1311,90 @@  static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
+struct bpf_raw_tracepoint {
+	struct tracepoint *tp;
+	struct bpf_prog *prog;
+};
+
+static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_raw_tracepoint *raw_tp = filp->private_data;
+
+	if (raw_tp->prog) {
+		bpf_probe_unregister(raw_tp->tp, raw_tp->prog);
+		bpf_prog_put(raw_tp->prog);
+	}
+	kfree(raw_tp);
+	return 0;
+}
+
+static const struct file_operations bpf_raw_tp_fops = {
+	.release	= bpf_raw_tracepoint_release,
+	.read		= bpf_dummy_read,
+	.write		= bpf_dummy_write,
+};
+
+static void *__find_tp(struct tracepoint *tp, void *priv)
+{
+	char *name = priv;
+
+	if (!strcmp(tp->name, name))
+		return tp;
+	return NULL;
+}
+
+#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
+
+static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
+{
+	struct bpf_raw_tracepoint *raw_tp;
+	struct tracepoint *tp;
+	struct bpf_prog *prog;
+	char tp_name[128];
+	int tp_fd, err;
+
+	if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
+			      sizeof(tp_name) - 1) < 0)
+		return -EFAULT;
+	tp_name[sizeof(tp_name) - 1] = 0;
+
+	tp = for_each_kernel_tracepoint(__find_tp, tp_name);
+	if (!tp)
+		return -ENOENT;
+
+	raw_tp = kmalloc(sizeof(*raw_tp), GFP_USER | __GFP_ZERO);
+	if (!raw_tp)
+		return -ENOMEM;
+	raw_tp->tp = tp;
+
+	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
+				 BPF_PROG_TYPE_RAW_TRACEPOINT);
+	if (IS_ERR(prog)) {
+		err = PTR_ERR(prog);
+		goto out_free_tp;
+	}
+
+	err = bpf_probe_register(raw_tp->tp, prog);
+	if (err)
+		goto out_put_prog;
+
+	raw_tp->prog = prog;
+	tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
+				 O_CLOEXEC);
+	if (tp_fd < 0) {
+		bpf_probe_unregister(raw_tp->tp, prog);
+		err = tp_fd;
+		goto out_put_prog;
+	}
+	return tp_fd;
+
+out_put_prog:
+	bpf_prog_put(prog);
+out_free_tp:
+	kfree(raw_tp);
+	return err;
+}
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
@@ -1921,6 +2005,9 @@  SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET_INFO_BY_FD:
 		err = bpf_obj_get_info_by_fd(&attr, uattr);
 		break;
+	case BPF_RAW_TRACEPOINT_OPEN:
+		err = bpf_raw_tracepoint_open(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c634e093951f..19576d216880 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -723,6 +723,86 @@  const struct bpf_verifier_ops tracepoint_verifier_ops = {
 const struct bpf_prog_ops tracepoint_prog_ops = {
 };
 
+/*
+ * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
+ * to avoid potential recursive reuse issue when/if tracepoints are added
+ * inside bpf_*_event_output and/or bpf_get_stack_id
+ */
+static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
+	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+	perf_fetch_caller_regs(regs);
+	return ____bpf_perf_event_output(regs, map, flags, data, size);
+}
+
+static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+	.func		= bpf_perf_event_output_raw_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
+	   struct bpf_map *, map, u64, flags)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+	perf_fetch_caller_regs(regs);
+	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
+	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+			       flags, 0, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
+	.func		= bpf_get_stackid_raw_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_perf_event_output_proto_raw_tp;
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto_raw_tp;
+	default:
+		return tracing_func_proto(func_id);
+	}
+}
+
+static bool raw_tp_prog_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					struct bpf_insn_access_aux *info)
+{
+	/* largest tracepoint in the kernel has 17 args */
+	if (off < 0 || off >= sizeof(__u64) * 17)
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
+	.get_func_proto  = raw_tp_prog_func_proto,
+	.is_valid_access = raw_tp_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+};
+
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
@@ -896,3 +976,206 @@  int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 
 	return ret;
 }
+
+static __always_inline
+void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
+{
+	rcu_read_lock();
+	preempt_disable();
+	(void) BPF_PROG_RUN(prog, args);
+	preempt_enable();
+	rcu_read_unlock();
+}
+
+#define EVAL1(FN, X) FN(X)
+#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
+#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
+#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
+#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
+#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
+
+#define COPY(X) args[X - 1] = arg##X;
+
+void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
+{
+	u64 args[1];
+
+	EVAL1(COPY, 1);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run1);
+void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
+{
+	u64 args[2];
+
+	EVAL2(COPY, 1, 2);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run2);
+void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3)
+{
+	u64 args[3];
+
+	EVAL3(COPY, 1, 2, 3);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run3);
+void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4)
+{
+	u64 args[4];
+
+	EVAL4(COPY, 1, 2, 3, 4);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run4);
+void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5)
+{
+	u64 args[5];
+
+	EVAL5(COPY, 1, 2, 3, 4, 5);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run5);
+void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6)
+{
+	u64 args[6];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run6);
+void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+{
+	u64 args[7];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL1(COPY, 7);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run7);
+void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8)
+{
+	u64 args[8];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL2(COPY, 7, 8);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run8);
+void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8, u64 arg9)
+{
+	u64 args[9];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL3(COPY, 7, 8, 9);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run9);
+void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10)
+{
+	u64 args[10];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL4(COPY, 7, 8, 9, 10);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run10);
+void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11)
+{
+	u64 args[11];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL5(COPY, 7, 8, 9, 10, 11);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run11);
+void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
+{
+	u64 args[12];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run12);
+void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
+		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
+{
+	u64 args[17];
+
+	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
+	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
+	EVAL5(COPY, 13, 14, 15, 16, 17);
+	__bpf_trace_run(prog, args);
+}
+EXPORT_SYMBOL_GPL(bpf_trace_run17);
+
+static int __bpf_probe_register(struct tracepoint *tp, struct bpf_prog *prog)
+{
+	unsigned long addr;
+	char buf[128];
+
+	/*
+	 * check that program doesn't access arguments beyond what's
+	 * available in this tracepoint
+	 */
+	if (prog->aux->max_ctx_offset > tp->num_args * sizeof(u64))
+		return -EINVAL;
+
+	snprintf(buf, sizeof(buf), "__bpf_trace_%s", tp->name);
+	addr = kallsyms_lookup_name(buf);
+	if (!addr)
+		return -ENOENT;
+
+	return tracepoint_probe_register(tp, (void *)addr, prog);
+}
+
+int bpf_probe_register(struct tracepoint *tp, struct bpf_prog *prog)
+{
+	int err;
+
+	mutex_lock(&bpf_event_mutex);
+	err = __bpf_probe_register(tp, prog);
+	mutex_unlock(&bpf_event_mutex);
+	return err;
+}
+
+static int __bpf_probe_unregister(struct tracepoint *tp, struct bpf_prog *prog)
+{
+	unsigned long addr;
+	char buf[128];
+
+	snprintf(buf, sizeof(buf), "__bpf_trace_%s", tp->name);
+	addr = kallsyms_lookup_name(buf);
+	if (!addr)
+		return -ENOENT;
+
+	return tracepoint_probe_unregister(tp, (void *)addr, prog);
+}
+
+int bpf_probe_unregister(struct tracepoint *tp, struct bpf_prog *prog)
+{
+	int err;
+
+	mutex_lock(&bpf_event_mutex);
+	err = __bpf_probe_unregister(tp, prog);
+	mutex_unlock(&bpf_event_mutex);
+	return err;
+}