Message ID | 20200923165401.2284447-2-songliubraving@fb.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | enable BPF_PROG_TEST_RUN for raw_tp | expand |
On Wed, Sep 23, 2020 at 9:54 AM Song Liu <songliubraving@fb.com> wrote: > > Add .test_run for raw_tracepoint. Also, introduce a new feature that runs > the target program on a specific CPU. This is achieved by a new flag in > bpf_attr.test, cpu_plus. For compatibility, cpu_plus == 0 means run the > program on current cpu, cpu_plus > 0 means run the program on cpu with id > (cpu_plus - 1). This feature is needed for BPF programs that handle > perf_event and other percpu resources, as the program can access these > resource locally. > > Acked-by: John Fastabend <john.fastabend@gmail.com> > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/linux/bpf.h | 3 ++ > include/uapi/linux/bpf.h | 5 ++ > kernel/bpf/syscall.c | 2 +- > kernel/trace/bpf_trace.c | 1 + > net/bpf/test_run.c | 88 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 5 ++ > 6 files changed, 103 insertions(+), 1 deletion(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index d7c5a6ed87e30..23758c282eb4b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1376,6 +1376,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, > int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, > const union bpf_attr *kattr, > union bpf_attr __user *uattr); > +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, > + const union bpf_attr *kattr, > + union bpf_attr __user *uattr); > bool btf_ctx_access(int off, int size, enum bpf_access_type type, > const struct bpf_prog *prog, > struct bpf_insn_access_aux *info); > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index a22812561064a..89acf41913e70 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -566,6 +566,11 @@ union bpf_attr { > */ > __aligned_u64 ctx_in; > __aligned_u64 ctx_out; > + __u32 cpu_plus; /* run this program on cpu > + * (cpu_plus - 1). > + * If cpu_plus == 0, run on > + * current cpu. > + */ the "_plus" part of the name is so confusing, just as off-by-one semantics.. Why not do what we do with BPF_PROG_ATTACH? I.e., we have flags field, and if the specific bit is set then we use extra field's value. In this case, you'd have: __u32 flags; __u32 cpu; /* naturally 0-based */ cpu indexing will be natural without any offsets, and you'll have something like BPF_PROG_TEST_CPU flag, that needs to be specified. This will work well with backward/forward compatibility. If you need a special "current CPU" mode, you can achieve that by not specifying BPF_PROG_TEST_CPU flag, or we can designate (__u32)-1 as a special "current CPU" value. WDYT? > } test; > > struct { /* anonymous struct used by BPF_*_GET_*_ID */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index ec68d3a23a2b7..4664531ff92ea 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2975,7 +2975,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > } > } > > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus > [...]
> On Sep 23, 2020, at 12:36 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Wed, Sep 23, 2020 at 9:54 AM Song Liu <songliubraving@fb.com> wrote: >> >> Add .test_run for raw_tracepoint. Also, introduce a new feature that runs >> the target program on a specific CPU. This is achieved by a new flag in >> bpf_attr.test, cpu_plus. For compatibility, cpu_plus == 0 means run the >> program on current cpu, cpu_plus > 0 means run the program on cpu with id >> (cpu_plus - 1). This feature is needed for BPF programs that handle >> perf_event and other percpu resources, as the program can access these >> resource locally. >> >> Acked-by: John Fastabend <john.fastabend@gmail.com> >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> include/linux/bpf.h | 3 ++ >> include/uapi/linux/bpf.h | 5 ++ >> kernel/bpf/syscall.c | 2 +- >> kernel/trace/bpf_trace.c | 1 + >> net/bpf/test_run.c | 88 ++++++++++++++++++++++++++++++++++ >> tools/include/uapi/linux/bpf.h | 5 ++ >> 6 files changed, 103 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index d7c5a6ed87e30..23758c282eb4b 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1376,6 +1376,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, >> int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, >> const union bpf_attr *kattr, >> union bpf_attr __user *uattr); >> +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, >> + const union bpf_attr *kattr, >> + union bpf_attr __user *uattr); >> bool btf_ctx_access(int off, int size, enum bpf_access_type type, >> const struct bpf_prog *prog, >> struct bpf_insn_access_aux *info); >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index a22812561064a..89acf41913e70 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -566,6 +566,11 @@ union bpf_attr { >> */ >> __aligned_u64 ctx_in; >> __aligned_u64 ctx_out; >> + __u32 cpu_plus; /* run this program on cpu >> + * (cpu_plus - 1). >> + * If cpu_plus == 0, run on >> + * current cpu. >> + */ > > the "_plus" part of the name is so confusing, just as off-by-one > semantics.. Why not do what we do with BPF_PROG_ATTACH? I.e., we have > flags field, and if the specific bit is set then we use extra field's > value. In this case, you'd have: > > __u32 flags; > __u32 cpu; /* naturally 0-based */ > > cpu indexing will be natural without any offsets, and you'll have > something like BPF_PROG_TEST_CPU flag, that needs to be specified. > This will work well with backward/forward compatibility. If you need a > special "current CPU" mode, you can achieve that by not specifying > BPF_PROG_TEST_CPU flag, or we can designate (__u32)-1 as a special > "current CPU" value. > > WDYT? Yes, we can add a flag here. If there was already a flags field in bpf_attr.test, I would have gone that way in the first place. Thanks, Song
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d7c5a6ed87e30..23758c282eb4b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1376,6 +1376,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a22812561064a..89acf41913e70 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -566,6 +566,11 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 cpu_plus; /* run this program on cpu + * (cpu_plus - 1). + * If cpu_plus == 0, run on + * current cpu. + */ } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ec68d3a23a2b7..4664531ff92ea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2975,7 +2975,7 @@ static int bpf_prog_query(const union bpf_attr *attr, } } -#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out +#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b2a5380eb1871..4553aebf53862 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1675,6 +1675,7 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { }; const struct bpf_prog_ops raw_tracepoint_prog_ops = { + .test_run = bpf_prog_test_run_raw_tp, }; const struct bpf_verifier_ops tracing_verifier_ops = { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 99eb8c6c0fbcc..b32e22f0ee16f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -11,6 +11,7 @@ #include <net/sock.h> #include <net/tcp.h> #include <linux/error-injection.h> +#include <linux/smp.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int b = 2, err = -EFAULT; u32 retval = 0; + if (kattr->test.cpu_plus) + return -EINVAL; + switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: @@ -236,6 +240,84 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, return err; } +struct bpf_raw_tp_test_run_info { + struct bpf_prog *prog; + void *ctx; + u32 retval; +}; + +static void +__bpf_prog_test_run_raw_tp(void *data) +{ + struct bpf_raw_tp_test_run_info *info = data; + + rcu_read_lock(); + migrate_disable(); + info->retval = BPF_PROG_RUN(info->prog, info->ctx); + migrate_enable(); + rcu_read_unlock(); +} + +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); + __u32 ctx_size_in = kattr->test.ctx_size_in; + struct bpf_raw_tp_test_run_info info; + int cpu, err = 0; + + /* doesn't support data_in/out, ctx_out, duration, or repeat */ + if (kattr->test.data_in || kattr->test.data_out || + kattr->test.ctx_out || kattr->test.duration || + kattr->test.repeat) + return -EINVAL; + + if (ctx_size_in < prog->aux->max_ctx_offset) + return -EINVAL; + + if (ctx_size_in) { + info.ctx = kzalloc(ctx_size_in, GFP_USER); + if (!info.ctx) + return -ENOMEM; + if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) { + err = -EFAULT; + goto out; + } + } else { + info.ctx = NULL; + } + + info.prog = prog; + cpu = kattr->test.cpu_plus - 1; + + if (!kattr->test.cpu_plus || cpu == smp_processor_id()) { + __bpf_prog_test_run_raw_tp(&info); + } else { + /* smp_call_function_single() also checks cpu_online() + * after csd_lock(). However, since cpu_plus is from user + * space, let's do an extra quick check to filter out + * invalid value before smp_call_function_single(). + */ + if (!cpu_online(cpu)) { + err = -ENXIO; + goto out; + } + + err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp, + &info, 1); + if (err) + goto out; + } + + if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) + err = -EFAULT; + +out: + kfree(info.ctx); + return err; +} + static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); @@ -410,6 +492,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (kattr->test.cpu_plus) + return -EINVAL; + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); if (IS_ERR(data)) @@ -607,6 +692,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; + if (kattr->test.cpu_plus) + return -EINVAL; + if (size < ETH_HLEN) return -EINVAL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a22812561064a..89acf41913e70 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -566,6 +566,11 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 cpu_plus; /* run this program on cpu + * (cpu_plus - 1). + * If cpu_plus == 0, run on + * current cpu. + */ } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */