diff mbox series

[bpf-next,v3,03/21] bpf: support bpf tracing/iter programs for BPF_LINK_CREATE

Message ID 20200507053918.1542509-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 7, 2020, 5:39 a.m. UTC
Given a bpf program, the step to create an anonymous bpf iterator is:
  - create a bpf_iter_link, which combines bpf program and the target.
    In the future, there could be more information recorded in the link.
    A link_fd will be returned to the user space.
  - create an anonymous bpf iterator with the given link_fd.

The bpf_iter_link can be pinned to bpffs mount file system to
create a file based bpf iterator as well.

The benefit to use of bpf_iter_link:
  - using bpf link simplifies design and implementation as bpf link
    is used for other tracing bpf programs.
  - for file based bpf iterator, bpf_iter_link provides a standard
    way to replace underlying bpf programs.
  - for both anonymous and free based iterators, bpf link query
    capability can be leveraged.

The patch added support of tracing/iter programs for BPF_LINK_CREATE.
A new link type BPF_LINK_TYPE_ITER is added to facilitate link
querying. Currently, only prog_id is needed, so there is no
additional in-kernel show_fdinfo() and fill_link_info() hook
is needed for BPF_LINK_TYPE_ITER link.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/linux/bpf_types.h      |  1 +
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           | 14 ++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 80 insertions(+)

Comments

Andrii Nakryiko May 8, 2020, 6:24 p.m. UTC | #1
On Wed, May 6, 2020 at 10:41 PM Yonghong Song <yhs@fb.com> wrote:
>
> Given a bpf program, the step to create an anonymous bpf iterator is:
>   - create a bpf_iter_link, which combines bpf program and the target.
>     In the future, there could be more information recorded in the link.
>     A link_fd will be returned to the user space.
>   - create an anonymous bpf iterator with the given link_fd.
>
> The bpf_iter_link can be pinned to bpffs mount file system to
> create a file based bpf iterator as well.
>
> The benefit to use of bpf_iter_link:
>   - using bpf link simplifies design and implementation as bpf link
>     is used for other tracing bpf programs.
>   - for file based bpf iterator, bpf_iter_link provides a standard
>     way to replace underlying bpf programs.
>   - for both anonymous and free based iterators, bpf link query
>     capability can be leveraged.
>
> The patch added support of tracing/iter programs for BPF_LINK_CREATE.
> A new link type BPF_LINK_TYPE_ITER is added to facilitate link
> querying. Currently, only prog_id is needed, so there is no
> additional in-kernel show_fdinfo() and fill_link_info() hook
> is needed for BPF_LINK_TYPE_ITER link.
>
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

still looks good, but I realized show_fdinfo and fill_link_info is
missing, see request for a follow-up below :)


>  include/linux/bpf.h            |  1 +
>  include/linux/bpf_types.h      |  1 +
>  include/uapi/linux/bpf.h       |  1 +
>  kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c           | 14 ++++++++
>  tools/include/uapi/linux/bpf.h |  1 +
>  6 files changed, 80 insertions(+)
>

[...]

> +static const struct bpf_link_ops bpf_iter_link_lops = {
> +       .release = bpf_iter_link_release,
> +       .dealloc = bpf_iter_link_dealloc,
> +};

Link infra supports .show_fdinfo and .fill_link_info methods, there is
no need to block on this, but it would be great to implement them from
BPF_LINK_TYPE_ITER as well in the same release as a follow-up. Thanks!


[...]
Yonghong Song May 9, 2020, 1:36 a.m. UTC | #2
On 5/8/20 11:24 AM, Andrii Nakryiko wrote:
> On Wed, May 6, 2020 at 10:41 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Given a bpf program, the step to create an anonymous bpf iterator is:
>>    - create a bpf_iter_link, which combines bpf program and the target.
>>      In the future, there could be more information recorded in the link.
>>      A link_fd will be returned to the user space.
>>    - create an anonymous bpf iterator with the given link_fd.
>>
>> The bpf_iter_link can be pinned to bpffs mount file system to
>> create a file based bpf iterator as well.
>>
>> The benefit to use of bpf_iter_link:
>>    - using bpf link simplifies design and implementation as bpf link
>>      is used for other tracing bpf programs.
>>    - for file based bpf iterator, bpf_iter_link provides a standard
>>      way to replace underlying bpf programs.
>>    - for both anonymous and free based iterators, bpf link query
>>      capability can be leveraged.
>>
>> The patch added support of tracing/iter programs for BPF_LINK_CREATE.
>> A new link type BPF_LINK_TYPE_ITER is added to facilitate link
>> querying. Currently, only prog_id is needed, so there is no
>> additional in-kernel show_fdinfo() and fill_link_info() hook
>> is needed for BPF_LINK_TYPE_ITER link.
>>
>> Acked-by: Andrii Nakryiko <andriin@fb.com>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
> 
> still looks good, but I realized show_fdinfo and fill_link_info is
> missing, see request for a follow-up below :)
> 
> 
>>   include/linux/bpf.h            |  1 +
>>   include/linux/bpf_types.h      |  1 +
>>   include/uapi/linux/bpf.h       |  1 +
>>   kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++
>>   kernel/bpf/syscall.c           | 14 ++++++++
>>   tools/include/uapi/linux/bpf.h |  1 +
>>   6 files changed, 80 insertions(+)
>>
> 
> [...]
> 
>> +static const struct bpf_link_ops bpf_iter_link_lops = {
>> +       .release = bpf_iter_link_release,
>> +       .dealloc = bpf_iter_link_dealloc,
>> +};
> 
> Link infra supports .show_fdinfo and .fill_link_info methods, there is
> no need to block on this, but it would be great to implement them from
> BPF_LINK_TYPE_ITER as well in the same release as a follow-up. Thanks!

The reason I did not implement is due to we do not have additional 
information beyond prog_id to present. The prog_id itself gives all
information about this link. I looked at tracing program 
show_fdinfo/fill_link_info, the additional attach_type is printed.
But attach_type is obvious for BPF_LINK_TYPE_ITER which does not
need print.

In the future when we add more stuff to parameterize the bpf_iter,
will need to implement these two callbacks as well as bpftool.

> 
> 
> [...]
>
Andrii Nakryiko May 12, 2020, 3:15 a.m. UTC | #3
On Fri, May 8, 2020 at 6:36 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/8/20 11:24 AM, Andrii Nakryiko wrote:
> > On Wed, May 6, 2020 at 10:41 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> Given a bpf program, the step to create an anonymous bpf iterator is:
> >>    - create a bpf_iter_link, which combines bpf program and the target.
> >>      In the future, there could be more information recorded in the link.
> >>      A link_fd will be returned to the user space.
> >>    - create an anonymous bpf iterator with the given link_fd.
> >>
> >> The bpf_iter_link can be pinned to bpffs mount file system to
> >> create a file based bpf iterator as well.
> >>
> >> The benefit to use of bpf_iter_link:
> >>    - using bpf link simplifies design and implementation as bpf link
> >>      is used for other tracing bpf programs.
> >>    - for file based bpf iterator, bpf_iter_link provides a standard
> >>      way to replace underlying bpf programs.
> >>    - for both anonymous and free based iterators, bpf link query
> >>      capability can be leveraged.
> >>
> >> The patch added support of tracing/iter programs for BPF_LINK_CREATE.
> >> A new link type BPF_LINK_TYPE_ITER is added to facilitate link
> >> querying. Currently, only prog_id is needed, so there is no
> >> additional in-kernel show_fdinfo() and fill_link_info() hook
> >> is needed for BPF_LINK_TYPE_ITER link.
> >>
> >> Acked-by: Andrii Nakryiko <andriin@fb.com>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >
> > still looks good, but I realized show_fdinfo and fill_link_info is
> > missing, see request for a follow-up below :)
> >
> >
> >>   include/linux/bpf.h            |  1 +
> >>   include/linux/bpf_types.h      |  1 +
> >>   include/uapi/linux/bpf.h       |  1 +
> >>   kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++
> >>   kernel/bpf/syscall.c           | 14 ++++++++
> >>   tools/include/uapi/linux/bpf.h |  1 +
> >>   6 files changed, 80 insertions(+)
> >>
> >
> > [...]
> >
> >> +static const struct bpf_link_ops bpf_iter_link_lops = {
> >> +       .release = bpf_iter_link_release,
> >> +       .dealloc = bpf_iter_link_dealloc,
> >> +};
> >
> > Link infra supports .show_fdinfo and .fill_link_info methods, there is
> > no need to block on this, but it would be great to implement them from
> > BPF_LINK_TYPE_ITER as well in the same release as a follow-up. Thanks!
>
> The reason I did not implement is due to we do not have additional
> information beyond prog_id to present. The prog_id itself gives all
> information about this link. I looked at tracing program

Not all, e.g., bpf_iter target is invisible right now. It's good to
have this added in a follow up, but certainly not a blocker.


> show_fdinfo/fill_link_info, the additional attach_type is printed.
> But attach_type is obvious for BPF_LINK_TYPE_ITER which does not
> need print.
>
> In the future when we add more stuff to parameterize the bpf_iter,
> will need to implement these two callbacks as well as bpftool.

yep

>
> >
> >
> > [...]
> >
Yonghong Song May 13, 2020, 4:57 p.m. UTC | #4
On 5/11/20 8:15 PM, Andrii Nakryiko wrote:
> On Fri, May 8, 2020 at 6:36 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 5/8/20 11:24 AM, Andrii Nakryiko wrote:
>>> On Wed, May 6, 2020 at 10:41 PM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>> Given a bpf program, the step to create an anonymous bpf iterator is:
>>>>     - create a bpf_iter_link, which combines bpf program and the target.
>>>>       In the future, there could be more information recorded in the link.
>>>>       A link_fd will be returned to the user space.
>>>>     - create an anonymous bpf iterator with the given link_fd.
>>>>
>>>> The bpf_iter_link can be pinned to bpffs mount file system to
>>>> create a file based bpf iterator as well.
>>>>
>>>> The benefit to use of bpf_iter_link:
>>>>     - using bpf link simplifies design and implementation as bpf link
>>>>       is used for other tracing bpf programs.
>>>>     - for file based bpf iterator, bpf_iter_link provides a standard
>>>>       way to replace underlying bpf programs.
>>>>     - for both anonymous and free based iterators, bpf link query
>>>>       capability can be leveraged.
>>>>
>>>> The patch added support of tracing/iter programs for BPF_LINK_CREATE.
>>>> A new link type BPF_LINK_TYPE_ITER is added to facilitate link
>>>> querying. Currently, only prog_id is needed, so there is no
>>>> additional in-kernel show_fdinfo() and fill_link_info() hook
>>>> is needed for BPF_LINK_TYPE_ITER link.
>>>>
>>>> Acked-by: Andrii Nakryiko <andriin@fb.com>
>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>
>>> still looks good, but I realized show_fdinfo and fill_link_info is
>>> missing, see request for a follow-up below :)
>>>
>>>
>>>>    include/linux/bpf.h            |  1 +
>>>>    include/linux/bpf_types.h      |  1 +
>>>>    include/uapi/linux/bpf.h       |  1 +
>>>>    kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++
>>>>    kernel/bpf/syscall.c           | 14 ++++++++
>>>>    tools/include/uapi/linux/bpf.h |  1 +
>>>>    6 files changed, 80 insertions(+)
>>>>
>>>
>>> [...]
>>>
>>>> +static const struct bpf_link_ops bpf_iter_link_lops = {
>>>> +       .release = bpf_iter_link_release,
>>>> +       .dealloc = bpf_iter_link_dealloc,
>>>> +};
>>>
>>> Link infra supports .show_fdinfo and .fill_link_info methods, there is
>>> no need to block on this, but it would be great to implement them from
>>> BPF_LINK_TYPE_ITER as well in the same release as a follow-up. Thanks!
>>
>> The reason I did not implement is due to we do not have additional
>> information beyond prog_id to present. The prog_id itself gives all
>> information about this link. I looked at tracing program
> 
> Not all, e.g., bpf_iter target is invisible right now. It's good to
> have this added in a follow up, but certainly not a blocker.

Indeed, bpf_iter target is not visible now. We can add it to program
or to link. Adding to link is a reasonable idea as link itself is
the concept to merge program and target.

Will have a follow up patch for this later.

> 
> 
>> show_fdinfo/fill_link_info, the additional attach_type is printed.
>> But attach_type is obvious for BPF_LINK_TYPE_ITER which does not
>> need print.
>>
>> In the future when we add more stuff to parameterize the bpf_iter,
>> will need to implement these two callbacks as well as bpftool.
> 
> yep
> 
>>
>>>
>>>
>>> [...]
>>>
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f28bdd714754..e93d2d33c82c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1143,6 +1143,7 @@  struct bpf_iter_reg {
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 8345cdf553b8..29d22752fc87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -124,3 +124,4 @@  BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
 #ifdef CONFIG_CGROUP_BPF
 BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
 #endif
+BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 047b19fe716e..2bf33979f9ae 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -229,6 +229,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
+	BPF_LINK_TYPE_ITER = 4,
 
 	MAX_BPF_LINK_TYPE,
 };
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index dec182d8395a..03f5832909db 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -15,6 +15,11 @@  struct bpf_iter_target_info {
 	u32 btf_id;	/* cached value */
 };
 
+struct bpf_iter_link {
+	struct bpf_link link;
+	struct bpf_iter_target_info *tinfo;
+};
+
 static struct list_head targets = LIST_HEAD_INIT(targets);
 static DEFINE_MUTEX(targets_mutex);
 
@@ -93,3 +98,60 @@  bool bpf_iter_prog_supported(struct bpf_prog *prog)
 
 	return supported;
 }
+
+static void bpf_iter_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_iter_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_iter_link *iter_link =
+		container_of(link, struct bpf_iter_link, link);
+
+	kfree(iter_link);
+}
+
+static const struct bpf_link_ops bpf_iter_link_lops = {
+	.release = bpf_iter_link_release,
+	.dealloc = bpf_iter_link_dealloc,
+};
+
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct bpf_link_primer link_primer;
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_iter_link *link;
+	bool existed = false;
+	u32 prog_btf_id;
+	int err;
+
+	if (attr->link_create.target_fd || attr->link_create.flags)
+		return -EINVAL;
+
+	prog_btf_id = prog->aux->attach_btf_id;
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (tinfo->btf_id == prog_btf_id) {
+			existed = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+	if (!existed)
+		return -ENOENT;
+
+	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
+	if (!link)
+		return -ENOMEM;
+
+	bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
+	link->tinfo = tinfo;
+
+	err  = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
+		kfree(link);
+		return err;
+	}
+
+	return bpf_link_settle(&link_primer);
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bb1ab7da6103..6ffe2d8fb6c7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2729,6 +2729,8 @@  attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_GETSOCKOPT:
 	case BPF_CGROUP_SETSOCKOPT:
 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+	case BPF_TRACE_ITER:
+		return BPF_PROG_TYPE_TRACING;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3729,6 +3731,15 @@  static int bpf_map_do_batch(const union bpf_attr *attr,
 	return err;
 }
 
+static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	if (attr->link_create.attach_type == BPF_TRACE_ITER &&
+	    prog->expected_attach_type == BPF_TRACE_ITER)
+		return bpf_iter_link_attach(attr, prog);
+
+	return -EINVAL;
+}
+
 #define BPF_LINK_CREATE_LAST_FIELD link_create.flags
 static int link_create(union bpf_attr *attr)
 {
@@ -3765,6 +3776,9 @@  static int link_create(union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		ret = tracing_bpf_link_attach(attr, prog);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 047b19fe716e..2bf33979f9ae 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -229,6 +229,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
+	BPF_LINK_TYPE_ITER = 4,
 
 	MAX_BPF_LINK_TYPE,
 };