diff mbox series

[bpf-next,v1,05/19] bpf: support bpf tracing/iter programs for BPF_LINK_CREATE

Message ID 20200427201240.2994985-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song April 27, 2020, 8:12 p.m. UTC
Given a bpf program, the step to create an anonymous bpf iterator is:
  - create a bpf_iter_link, which combines bpf program and the target.
    In the future, there could be more information recorded in the link.
    A link_fd will be returned to the user space.
  - create an anonymous bpf iterator with the given link_fd.

The anonymous bpf iterator (and its underlying bpf_link) will be
used to create file based bpf iterator as well.

The benefit to use of bpf_iter_link:
  - for file based bpf iterator, bpf_iter_link provides a standard
    way to replace underlying bpf programs.
  - for both anonymous and free based iterators, bpf link query
    capability can be leveraged.

The patch added support of tracing/iter programs for  BPF_LINK_CREATE.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c  | 15 ++++++++++++
 3 files changed, 71 insertions(+)

Comments

Martin KaFai Lau April 29, 2020, 1:17 a.m. UTC | #1
On Mon, Apr 27, 2020 at 01:12:40PM -0700, Yonghong Song wrote:
> Given a bpf program, the step to create an anonymous bpf iterator is:
>   - create a bpf_iter_link, which combines bpf program and the target.
>     In the future, there could be more information recorded in the link.
>     A link_fd will be returned to the user space.
>   - create an anonymous bpf iterator with the given link_fd.
> 
> The anonymous bpf iterator (and its underlying bpf_link) will be
> used to create file based bpf iterator as well.
> 
> The benefit to use of bpf_iter_link:
>   - for file based bpf iterator, bpf_iter_link provides a standard
>     way to replace underlying bpf programs.
>   - for both anonymous and free based iterators, bpf link query
>     capability can be leveraged.
> 
> The patch added support of tracing/iter programs for  BPF_LINK_CREATE.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h   |  2 ++
>  kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c  | 15 ++++++++++++
>  3 files changed, 71 insertions(+)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4ac8d61f7c3e..60ecb73d8f6d 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1034,6 +1034,7 @@ extern const struct file_operations bpf_prog_fops;
>  extern const struct bpf_prog_ops bpf_offload_prog_ops;
>  extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
>  extern const struct bpf_verifier_ops xdp_analyzer_ops;
> +extern const struct bpf_link_ops bpf_iter_link_lops;
>  
>  struct bpf_prog *bpf_prog_get(u32 ufd);
>  struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
> @@ -1129,6 +1130,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
>  struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
>  				   u64 *session_id, u64 *seq_num, bool is_last);
>  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
> +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
>  
>  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 284c95587803..9532e7bcb8e1 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -14,6 +14,11 @@ struct bpf_iter_target_info {
>  	u32 target_feature;
>  };
>  
> +struct bpf_iter_link {
> +	struct bpf_link link;
> +	struct bpf_iter_target_info *tinfo;
> +};
> +
>  static struct list_head targets;
>  static struct mutex targets_mutex;
>  static bool bpf_iter_inited = false;
> @@ -67,3 +72,52 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  
>  	return ret;
>  }
> +
> +static void bpf_iter_link_release(struct bpf_link *link)
> +{
> +}
> +
> +static void bpf_iter_link_dealloc(struct bpf_link *link)
> +{
> +}
> +
> +const struct bpf_link_ops bpf_iter_link_lops = {
> +	.release = bpf_iter_link_release,
> +	.dealloc = bpf_iter_link_dealloc,
> +};
> +
> +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> +{
> +	struct bpf_iter_target_info *tinfo;
> +	struct bpf_iter_link *link;
> +	const char *func_name;
> +	bool existed = false;
> +	int err;
> +
> +	if (attr->link_create.target_fd || attr->link_create.flags)
> +		return -EINVAL;
> +
> +	func_name = prog->aux->attach_func_name;
> +	mutex_lock(&targets_mutex);
> +	list_for_each_entry(tinfo, &targets, list) {
> +		if (!strcmp(tinfo->target_func_name, func_name)) {
This can be done in prog load time.

Also, is it better to store a btf_id at tinfo instead of doing strcmp here?

> +			existed = true;
> +			break;
> +		}
> +	}
> +	mutex_unlock(&targets_mutex);
> +	if (!existed)
> +		return -ENOENT;
> +
> +	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
> +	if (!link)
> +		return -ENOMEM;
> +
> +	bpf_link_init(&link->link, &bpf_iter_link_lops, prog);
> +	link->tinfo = tinfo;
> +
> +	err = bpf_link_new_fd(&link->link);
> +	if (err < 0)
> +		kfree(link);
> +	return err;
> +}
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 022187640943..8741b5e11c85 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2269,6 +2269,8 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
>  	else if (link->ops == &bpf_cgroup_link_lops)
>  		link_type = "cgroup";
>  #endif
> +	else if (link->ops == &bpf_iter_link_lops)
> +		link_type = "iter";
>  	else
>  		link_type = "unknown";
>  
> @@ -2597,6 +2599,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
>  	case BPF_CGROUP_GETSOCKOPT:
>  	case BPF_CGROUP_SETSOCKOPT:
>  		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
> +	case BPF_TRACE_ITER:
> +		return BPF_PROG_TYPE_TRACING;
>  	default:
>  		return BPF_PROG_TYPE_UNSPEC;
>  	}
> @@ -3571,6 +3575,14 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
>  	return err;
>  }
>  
> +static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> +{
> +	if (attr->link_create.attach_type == BPF_TRACE_ITER)
Has prog->expected_attach_type been checked also?

> +		return bpf_iter_link_attach(attr, prog);
> +
> +	return -EINVAL;
> +}
> +
>  #define BPF_LINK_CREATE_LAST_FIELD link_create.flags
>  static int link_create(union bpf_attr *attr)
>  {
> @@ -3607,6 +3619,9 @@ static int link_create(union bpf_attr *attr)
>  	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
>  		ret = cgroup_bpf_link_attach(attr, prog);
>  		break;
> +	case BPF_PROG_TYPE_TRACING:
> +		ret = tracing_bpf_link_attach(attr, prog);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  	}
> -- 
> 2.24.1
>
Andrii Nakryiko April 29, 2020, 6:25 a.m. UTC | #2
On Mon, Apr 27, 2020 at 1:13 PM Yonghong Song <yhs@fb.com> wrote:
>
> Given a bpf program, the step to create an anonymous bpf iterator is:
>   - create a bpf_iter_link, which combines bpf program and the target.
>     In the future, there could be more information recorded in the link.
>     A link_fd will be returned to the user space.
>   - create an anonymous bpf iterator with the given link_fd.
>
> The anonymous bpf iterator (and its underlying bpf_link) will be
> used to create file based bpf iterator as well.
>
> The benefit to use of bpf_iter_link:
>   - for file based bpf iterator, bpf_iter_link provides a standard
>     way to replace underlying bpf programs.
>   - for both anonymous and free based iterators, bpf link query
>     capability can be leveraged.
>
> The patch added support of tracing/iter programs for  BPF_LINK_CREATE.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h   |  2 ++
>  kernel/bpf/bpf_iter.c | 54 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c  | 15 ++++++++++++
>  3 files changed, 71 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4ac8d61f7c3e..60ecb73d8f6d 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1034,6 +1034,7 @@ extern const struct file_operations bpf_prog_fops;
>  extern const struct bpf_prog_ops bpf_offload_prog_ops;
>  extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
>  extern const struct bpf_verifier_ops xdp_analyzer_ops;
> +extern const struct bpf_link_ops bpf_iter_link_lops;

show_fdinfo implementation for bpf_link has changed, so thankfully
this won't be necessary after you rebase on latest master :)

>
>  struct bpf_prog *bpf_prog_get(u32 ufd);
>  struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
> @@ -1129,6 +1130,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
>  struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
>                                    u64 *session_id, u64 *seq_num, bool is_last);
>  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
> +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
>
>  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 284c95587803..9532e7bcb8e1 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -14,6 +14,11 @@ struct bpf_iter_target_info {
>         u32 target_feature;
>  };
>
> +struct bpf_iter_link {
> +       struct bpf_link link;
> +       struct bpf_iter_target_info *tinfo;
> +};
> +
>  static struct list_head targets;
>  static struct mutex targets_mutex;
>  static bool bpf_iter_inited = false;
> @@ -67,3 +72,52 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>
>         return ret;
>  }
> +
> +static void bpf_iter_link_release(struct bpf_link *link)
> +{
> +}
> +
> +static void bpf_iter_link_dealloc(struct bpf_link *link)
> +{

Here you need to kfree() link struct. See bpf_raw_tp_link_dealloc() for example.


> +}
> +
> +const struct bpf_link_ops bpf_iter_link_lops = {
> +       .release = bpf_iter_link_release,
> +       .dealloc = bpf_iter_link_dealloc,
> +};
> +

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4ac8d61f7c3e..60ecb73d8f6d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1034,6 +1034,7 @@  extern const struct file_operations bpf_prog_fops;
 extern const struct bpf_prog_ops bpf_offload_prog_ops;
 extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
 extern const struct bpf_verifier_ops xdp_analyzer_ops;
+extern const struct bpf_link_ops bpf_iter_link_lops;
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
@@ -1129,6 +1130,7 @@  int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
 				   u64 *session_id, u64 *seq_num, bool is_last);
 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 284c95587803..9532e7bcb8e1 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -14,6 +14,11 @@  struct bpf_iter_target_info {
 	u32 target_feature;
 };
 
+struct bpf_iter_link {
+	struct bpf_link link;
+	struct bpf_iter_target_info *tinfo;
+};
+
 static struct list_head targets;
 static struct mutex targets_mutex;
 static bool bpf_iter_inited = false;
@@ -67,3 +72,52 @@  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 
 	return ret;
 }
+
+static void bpf_iter_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_iter_link_dealloc(struct bpf_link *link)
+{
+}
+
+const struct bpf_link_ops bpf_iter_link_lops = {
+	.release = bpf_iter_link_release,
+	.dealloc = bpf_iter_link_dealloc,
+};
+
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_iter_link *link;
+	const char *func_name;
+	bool existed = false;
+	int err;
+
+	if (attr->link_create.target_fd || attr->link_create.flags)
+		return -EINVAL;
+
+	func_name = prog->aux->attach_func_name;
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (!strcmp(tinfo->target_func_name, func_name)) {
+			existed = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+	if (!existed)
+		return -ENOENT;
+
+	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
+	if (!link)
+		return -ENOMEM;
+
+	bpf_link_init(&link->link, &bpf_iter_link_lops, prog);
+	link->tinfo = tinfo;
+
+	err = bpf_link_new_fd(&link->link);
+	if (err < 0)
+		kfree(link);
+	return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 022187640943..8741b5e11c85 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2269,6 +2269,8 @@  static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 	else if (link->ops == &bpf_cgroup_link_lops)
 		link_type = "cgroup";
 #endif
+	else if (link->ops == &bpf_iter_link_lops)
+		link_type = "iter";
 	else
 		link_type = "unknown";
 
@@ -2597,6 +2599,8 @@  attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_GETSOCKOPT:
 	case BPF_CGROUP_SETSOCKOPT:
 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+	case BPF_TRACE_ITER:
+		return BPF_PROG_TYPE_TRACING;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3571,6 +3575,14 @@  static int bpf_map_do_batch(const union bpf_attr *attr,
 	return err;
 }
 
+static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	if (attr->link_create.attach_type == BPF_TRACE_ITER)
+		return bpf_iter_link_attach(attr, prog);
+
+	return -EINVAL;
+}
+
 #define BPF_LINK_CREATE_LAST_FIELD link_create.flags
 static int link_create(union bpf_attr *attr)
 {
@@ -3607,6 +3619,9 @@  static int link_create(union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		ret = tracing_bpf_link_attach(attr, prog);
+		break;
 	default:
 		ret = -EINVAL;
 	}