diff mbox series

[v2,bpf-next,1/3] bpf: enable BPF_PROG_TEST_RUN for raw_tracepoint

Message ID 20200923165401.2284447-2-songliubraving@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series enable BPF_PROG_TEST_RUN for raw_tp | expand

Commit Message

Song Liu Sept. 23, 2020, 4:53 p.m. UTC
Add .test_run for raw_tracepoint. Also, introduce a new feature that runs
the target program on a specific CPU. This is achieved by a new flag in
bpf_attr.test, cpu_plus. For compatibility, cpu_plus == 0 means run the
program on current cpu, cpu_plus > 0 means run the program on cpu with id
(cpu_plus - 1). This feature is needed for BPF programs that handle
perf_event and other percpu resources, as the program can access these
resource locally.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/bpf.h            |  3 ++
 include/uapi/linux/bpf.h       |  5 ++
 kernel/bpf/syscall.c           |  2 +-
 kernel/trace/bpf_trace.c       |  1 +
 net/bpf/test_run.c             | 88 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  5 ++
 6 files changed, 103 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Sept. 23, 2020, 7:36 p.m. UTC | #1
On Wed, Sep 23, 2020 at 9:54 AM Song Liu <songliubraving@fb.com> wrote:
>
> Add .test_run for raw_tracepoint. Also, introduce a new feature that runs
> the target program on a specific CPU. This is achieved by a new flag in
> bpf_attr.test, cpu_plus. For compatibility, cpu_plus == 0 means run the
> program on current cpu, cpu_plus > 0 means run the program on cpu with id
> (cpu_plus - 1). This feature is needed for BPF programs that handle
> perf_event and other percpu resources, as the program can access these
> resource locally.
>
> Acked-by: John Fastabend <john.fastabend@gmail.com>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/linux/bpf.h            |  3 ++
>  include/uapi/linux/bpf.h       |  5 ++
>  kernel/bpf/syscall.c           |  2 +-
>  kernel/trace/bpf_trace.c       |  1 +
>  net/bpf/test_run.c             | 88 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  5 ++
>  6 files changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index d7c5a6ed87e30..23758c282eb4b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1376,6 +1376,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
>  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>                                      const union bpf_attr *kattr,
>                                      union bpf_attr __user *uattr);
> +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
> +                            const union bpf_attr *kattr,
> +                            union bpf_attr __user *uattr);
>  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>                     const struct bpf_prog *prog,
>                     struct bpf_insn_access_aux *info);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a22812561064a..89acf41913e70 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -566,6 +566,11 @@ union bpf_attr {
>                                                  */
>                 __aligned_u64   ctx_in;
>                 __aligned_u64   ctx_out;
> +               __u32           cpu_plus;       /* run this program on cpu
> +                                                * (cpu_plus - 1).
> +                                                * If cpu_plus == 0, run on
> +                                                * current cpu.
> +                                                */

the "_plus" part of the name is so confusing, just as off-by-one
semantics.. Why not do what we do with BPF_PROG_ATTACH? I.e., we have
flags field, and if the specific bit is set then we use extra field's
value. In this case, you'd have:

__u32 flags;
__u32 cpu; /* naturally 0-based */

cpu indexing will be natural without any offsets, and you'll have
something like BPF_PROG_TEST_CPU flag, that needs to be specified.
This will work well with backward/forward compatibility. If you need a
special "current CPU" mode, you can achieve that by not specifying
BPF_PROG_TEST_CPU flag, or we can designate (__u32)-1 as a special
"current CPU" value.

WDYT?


>         } test;
>
>         struct { /* anonymous struct used by BPF_*_GET_*_ID */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index ec68d3a23a2b7..4664531ff92ea 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2975,7 +2975,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
>         }
>  }
>
> -#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
> +#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus
>

[...]
Song Liu Sept. 23, 2020, 9:59 p.m. UTC | #2
> On Sep 23, 2020, at 12:36 PM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Wed, Sep 23, 2020 at 9:54 AM Song Liu <songliubraving@fb.com> wrote:
>> 
>> Add .test_run for raw_tracepoint. Also, introduce a new feature that runs
>> the target program on a specific CPU. This is achieved by a new flag in
>> bpf_attr.test, cpu_plus. For compatibility, cpu_plus == 0 means run the
>> program on current cpu, cpu_plus > 0 means run the program on cpu with id
>> (cpu_plus - 1). This feature is needed for BPF programs that handle
>> perf_event and other percpu resources, as the program can access these
>> resource locally.
>> 
>> Acked-by: John Fastabend <john.fastabend@gmail.com>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> include/linux/bpf.h            |  3 ++
>> include/uapi/linux/bpf.h       |  5 ++
>> kernel/bpf/syscall.c           |  2 +-
>> kernel/trace/bpf_trace.c       |  1 +
>> net/bpf/test_run.c             | 88 ++++++++++++++++++++++++++++++++++
>> tools/include/uapi/linux/bpf.h |  5 ++
>> 6 files changed, 103 insertions(+), 1 deletion(-)
>> 
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index d7c5a6ed87e30..23758c282eb4b 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1376,6 +1376,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
>> int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>>                                     const union bpf_attr *kattr,
>>                                     union bpf_attr __user *uattr);
>> +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
>> +                            const union bpf_attr *kattr,
>> +                            union bpf_attr __user *uattr);
>> bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>>                    const struct bpf_prog *prog,
>>                    struct bpf_insn_access_aux *info);
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index a22812561064a..89acf41913e70 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -566,6 +566,11 @@ union bpf_attr {
>>                                                 */
>>                __aligned_u64   ctx_in;
>>                __aligned_u64   ctx_out;
>> +               __u32           cpu_plus;       /* run this program on cpu
>> +                                                * (cpu_plus - 1).
>> +                                                * If cpu_plus == 0, run on
>> +                                                * current cpu.
>> +                                                */
> 
> the "_plus" part of the name is so confusing, just as off-by-one
> semantics.. Why not do what we do with BPF_PROG_ATTACH? I.e., we have
> flags field, and if the specific bit is set then we use extra field's
> value. In this case, you'd have:
> 
> __u32 flags;
> __u32 cpu; /* naturally 0-based */
> 
> cpu indexing will be natural without any offsets, and you'll have
> something like BPF_PROG_TEST_CPU flag, that needs to be specified.
> This will work well with backward/forward compatibility. If you need a
> special "current CPU" mode, you can achieve that by not specifying
> BPF_PROG_TEST_CPU flag, or we can designate (__u32)-1 as a special
> "current CPU" value.
> 
> WDYT?

Yes, we can add a flag here. If there was already a flags field in
bpf_attr.test, I would have gone that way in the first place. 

Thanks,
Song
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d7c5a6ed87e30..23758c282eb4b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1376,6 +1376,9 @@  int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr);
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+			     const union bpf_attr *kattr,
+			     union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a22812561064a..89acf41913e70 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -566,6 +566,11 @@  union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		cpu_plus;	/* run this program on cpu
+						 * (cpu_plus - 1).
+						 * If cpu_plus == 0, run on
+						 * current cpu.
+						 */
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec68d3a23a2b7..4664531ff92ea 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2975,7 +2975,7 @@  static int bpf_prog_query(const union bpf_attr *attr,
 	}
 }
 
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus
 
 static int bpf_prog_test_run(const union bpf_attr *attr,
 			     union bpf_attr __user *uattr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b2a5380eb1871..4553aebf53862 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1675,6 +1675,7 @@  const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 };
 
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+	.test_run = bpf_prog_test_run_raw_tp,
 };
 
 const struct bpf_verifier_ops tracing_verifier_ops = {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 99eb8c6c0fbcc..b32e22f0ee16f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -11,6 +11,7 @@ 
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <linux/error-injection.h>
+#include <linux/smp.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
@@ -204,6 +205,9 @@  int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 	int b = 2, err = -EFAULT;
 	u32 retval = 0;
 
+	if (kattr->test.cpu_plus)
+		return -EINVAL;
+
 	switch (prog->expected_attach_type) {
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
@@ -236,6 +240,84 @@  int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 	return err;
 }
 
+struct bpf_raw_tp_test_run_info {
+	struct bpf_prog *prog;
+	void *ctx;
+	u32 retval;
+};
+
+static void
+__bpf_prog_test_run_raw_tp(void *data)
+{
+	struct bpf_raw_tp_test_run_info *info = data;
+
+	rcu_read_lock();
+	migrate_disable();
+	info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+	migrate_enable();
+	rcu_read_unlock();
+}
+
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+			     const union bpf_attr *kattr,
+			     union bpf_attr __user *uattr)
+{
+	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	__u32 ctx_size_in = kattr->test.ctx_size_in;
+	struct bpf_raw_tp_test_run_info info;
+	int cpu, err = 0;
+
+	/* doesn't support data_in/out, ctx_out, duration, or repeat */
+	if (kattr->test.data_in || kattr->test.data_out ||
+	    kattr->test.ctx_out || kattr->test.duration ||
+	    kattr->test.repeat)
+		return -EINVAL;
+
+	if (ctx_size_in < prog->aux->max_ctx_offset)
+		return -EINVAL;
+
+	if (ctx_size_in) {
+		info.ctx = kzalloc(ctx_size_in, GFP_USER);
+		if (!info.ctx)
+			return -ENOMEM;
+		if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+	} else {
+		info.ctx = NULL;
+	}
+
+	info.prog = prog;
+	cpu = kattr->test.cpu_plus - 1;
+
+	if (!kattr->test.cpu_plus || cpu == smp_processor_id()) {
+		__bpf_prog_test_run_raw_tp(&info);
+	} else {
+		/* smp_call_function_single() also checks cpu_online()
+		 * after csd_lock(). However, since cpu_plus is from user
+		 * space, let's do an extra quick check to filter out
+		 * invalid value before smp_call_function_single().
+		 */
+		if (!cpu_online(cpu)) {
+			err = -ENXIO;
+			goto out;
+		}
+
+		err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
+					       &info, 1);
+		if (err)
+			goto out;
+	}
+
+	if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+		err = -EFAULT;
+
+out:
+	kfree(info.ctx);
+	return err;
+}
+
 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
 {
 	void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@@ -410,6 +492,9 @@  int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	void *data;
 	int ret;
 
+	if (kattr->test.cpu_plus)
+		return -EINVAL;
+
 	data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
 			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
 	if (IS_ERR(data))
@@ -607,6 +692,9 @@  int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
 
+	if (kattr->test.cpu_plus)
+		return -EINVAL;
+
 	if (size < ETH_HLEN)
 		return -EINVAL;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a22812561064a..89acf41913e70 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -566,6 +566,11 @@  union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		cpu_plus;	/* run this program on cpu
+						 * (cpu_plus - 1).
+						 * If cpu_plus == 0, run on
+						 * current cpu.
+						 */
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */