diff mbox series

[bpf-next,v1,02/19] bpf: implement an interface to register bpf_iter targets

Message ID 20200427201236.2994722-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song April 27, 2020, 8:12 p.m. UTC
The target can call bpf_iter_reg_target() to register itself.
The needed information:
  target:           target name, reprsented as a directory hierarchy
  target_func_name: the kernel func name used by verifier to
                    verify bpf programs
  seq_ops:          the seq_file operations for the target
  seq_priv_size:    the private_data size needed by the seq_file
                    operations
  target_feature:   certain feature requested by the target for
                    bpf_iter to prepare for seq_file operations.

A little bit more explanations on the target name and target_feature.
For example, the target name can be "bpf_map", "task", "task/file",
which represents iterating all bpf_map's, all tasks, or all files
of all tasks.

The target feature is mostly for reusing existing seq_file operations.
For example, /proc/net/{tcp6, ipv6_route, netlink, ...} seq_file private
data contains a reference to net namespace. When bpf_iter tries to
reuse the same seq_ops, its seq_file private data need the net namespace
setup properly too. In this case, the bpf_iter infrastructure can help
set up properly before doing seq_file operations.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   | 11 ++++++++++
 kernel/bpf/Makefile   |  2 +-
 kernel/bpf/bpf_iter.c | 50 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/bpf_iter.c

Comments

Martin KaFai Lau April 28, 2020, 4:20 p.m. UTC | #1
On Mon, Apr 27, 2020 at 01:12:36PM -0700, Yonghong Song wrote:
> The target can call bpf_iter_reg_target() to register itself.
> The needed information:
>   target:           target name, reprsented as a directory hierarchy
>   target_func_name: the kernel func name used by verifier to
>                     verify bpf programs
>   seq_ops:          the seq_file operations for the target
>   seq_priv_size:    the private_data size needed by the seq_file
>                     operations
>   target_feature:   certain feature requested by the target for
>                     bpf_iter to prepare for seq_file operations.
> 
> A little bit more explanations on the target name and target_feature.
> For example, the target name can be "bpf_map", "task", "task/file",
> which represents iterating all bpf_map's, all tasks, or all files
> of all tasks.
> 
> The target feature is mostly for reusing existing seq_file operations.
> For example, /proc/net/{tcp6, ipv6_route, netlink, ...} seq_file private
> data contains a reference to net namespace. When bpf_iter tries to
> reuse the same seq_ops, its seq_file private data need the net namespace
> setup properly too. In this case, the bpf_iter infrastructure can help
> set up properly before doing seq_file operations.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h   | 11 ++++++++++
>  kernel/bpf/Makefile   |  2 +-
>  kernel/bpf/bpf_iter.c | 50 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 62 insertions(+), 1 deletion(-)
>  create mode 100644 kernel/bpf/bpf_iter.c
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 10960cfabea4..5e56abc1e2f1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -31,6 +31,7 @@ struct seq_file;
>  struct btf;
>  struct btf_type;
>  struct exception_table_entry;
> +struct seq_operations;
>  
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
> @@ -1109,6 +1110,16 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
>  int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>  int bpf_obj_get_user(const char __user *pathname, int flags);
>  
> +struct bpf_iter_reg {
> +	const char *target;
> +	const char *target_func_name;
> +	const struct seq_operations *seq_ops;
> +	u32 seq_priv_size;
> +	u32 target_feature;
> +};
> +
> +int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
> +
>  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index f2d7be596966..6a8b0febd3f6 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -2,7 +2,7 @@
>  obj-y := core.o
>  CFLAGS_core.o += $(call cc-disable-warning, override-init)
>  
> -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
> +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
>  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
>  obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
>  obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> new file mode 100644
> index 000000000000..1115b978607a
> --- /dev/null
> +++ b/kernel/bpf/bpf_iter.c
> @@ -0,0 +1,50 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2020 Facebook */
> +
> +#include <linux/fs.h>
> +#include <linux/filter.h>
> +#include <linux/bpf.h>
> +
> +struct bpf_iter_target_info {
> +	struct list_head list;
> +	const char *target;
> +	const char *target_func_name;
> +	const struct seq_operations *seq_ops;
> +	u32 seq_priv_size;
> +	u32 target_feature;
> +};
> +
> +static struct list_head targets;
> +static struct mutex targets_mutex;
> +static bool bpf_iter_inited = false;
The "!bpf_iter_inited" test below is racy.

LIST_HEAD_INIT and DEFINE_MUTEX can be used instead.

> +
> +int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
> +{
> +	struct bpf_iter_target_info *tinfo;
> +
> +	/* The earliest bpf_iter_reg_target() is called at init time
> +	 * where the bpf_iter registration is serialized.
> +	 */
> +	if (!bpf_iter_inited) {
> +		INIT_LIST_HEAD(&targets);
> +		mutex_init(&targets_mutex);
> +		bpf_iter_inited = true;
> +	}
Yonghong Song April 28, 2020, 4:50 p.m. UTC | #2
On 4/28/20 9:20 AM, Martin KaFai Lau wrote:
> On Mon, Apr 27, 2020 at 01:12:36PM -0700, Yonghong Song wrote:
>> The target can call bpf_iter_reg_target() to register itself.
>> The needed information:
>>    target:           target name, reprsented as a directory hierarchy
>>    target_func_name: the kernel func name used by verifier to
>>                      verify bpf programs
>>    seq_ops:          the seq_file operations for the target
>>    seq_priv_size:    the private_data size needed by the seq_file
>>                      operations
>>    target_feature:   certain feature requested by the target for
>>                      bpf_iter to prepare for seq_file operations.
>>
>> A little bit more explanations on the target name and target_feature.
>> For example, the target name can be "bpf_map", "task", "task/file",
>> which represents iterating all bpf_map's, all tasks, or all files
>> of all tasks.
>>
>> The target feature is mostly for reusing existing seq_file operations.
>> For example, /proc/net/{tcp6, ipv6_route, netlink, ...} seq_file private
>> data contains a reference to net namespace. When bpf_iter tries to
>> reuse the same seq_ops, its seq_file private data need the net namespace
>> setup properly too. In this case, the bpf_iter infrastructure can help
>> set up properly before doing seq_file operations.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h   | 11 ++++++++++
>>   kernel/bpf/Makefile   |  2 +-
>>   kernel/bpf/bpf_iter.c | 50 +++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 62 insertions(+), 1 deletion(-)
>>   create mode 100644 kernel/bpf/bpf_iter.c
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 10960cfabea4..5e56abc1e2f1 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -31,6 +31,7 @@ struct seq_file;
>>   struct btf;
>>   struct btf_type;
>>   struct exception_table_entry;
>> +struct seq_operations;
>>   
>>   extern struct idr btf_idr;
>>   extern spinlock_t btf_idr_lock;
>> @@ -1109,6 +1110,16 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
>>   int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>>   int bpf_obj_get_user(const char __user *pathname, int flags);
>>   
>> +struct bpf_iter_reg {
>> +	const char *target;
>> +	const char *target_func_name;
>> +	const struct seq_operations *seq_ops;
>> +	u32 seq_priv_size;
>> +	u32 target_feature;
>> +};
>> +
>> +int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
>> +
>>   int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>>   int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
>>   int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
>> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
>> index f2d7be596966..6a8b0febd3f6 100644
>> --- a/kernel/bpf/Makefile
>> +++ b/kernel/bpf/Makefile
>> @@ -2,7 +2,7 @@
>>   obj-y := core.o
>>   CFLAGS_core.o += $(call cc-disable-warning, override-init)
>>   
>> -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
>> +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
>>   obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
>>   obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
>>   obj-$(CONFIG_BPF_SYSCALL) += disasm.o
>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>> new file mode 100644
>> index 000000000000..1115b978607a
>> --- /dev/null
>> +++ b/kernel/bpf/bpf_iter.c
>> @@ -0,0 +1,50 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright (c) 2020 Facebook */
>> +
>> +#include <linux/fs.h>
>> +#include <linux/filter.h>
>> +#include <linux/bpf.h>
>> +
>> +struct bpf_iter_target_info {
>> +	struct list_head list;
>> +	const char *target;
>> +	const char *target_func_name;
>> +	const struct seq_operations *seq_ops;
>> +	u32 seq_priv_size;
>> +	u32 target_feature;
>> +};
>> +
>> +static struct list_head targets;
>> +static struct mutex targets_mutex;
>> +static bool bpf_iter_inited = false;
> The "!bpf_iter_inited" test below is racy.

Yes, as mentioned in the comments, all currently implemented
targets are called at __init stage (do_basic_setup()->do_initcalls()),
I think there is no race here. But looking at the
code again, I am not so sure about my assumption any more.

> 
> LIST_HEAD_INIT and DEFINE_MUTEX can be used instead.

Will use these macros instead. Thanks!

> 
>> +
>> +int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>> +{
>> +	struct bpf_iter_target_info *tinfo;
>> +
>> +	/* The earliest bpf_iter_reg_target() is called at init time
>> +	 * where the bpf_iter registration is serialized.
>> +	 */
>> +	if (!bpf_iter_inited) {
>> +		INIT_LIST_HEAD(&targets);
>> +		mutex_init(&targets_mutex);
>> +		bpf_iter_inited = true;
>> +	}
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 10960cfabea4..5e56abc1e2f1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@  struct seq_file;
 struct btf;
 struct btf_type;
 struct exception_table_entry;
+struct seq_operations;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -1109,6 +1110,16 @@  struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+struct bpf_iter_reg {
+	const char *target;
+	const char *target_func_name;
+	const struct seq_operations *seq_ops;
+	u32 seq_priv_size;
+	u32 target_feature;
+};
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f2d7be596966..6a8b0febd3f6 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@ 
 obj-y := core.o
 CFLAGS_core.o += $(call cc-disable-warning, override-init)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
new file mode 100644
index 000000000000..1115b978607a
--- /dev/null
+++ b/kernel/bpf/bpf_iter.c
@@ -0,0 +1,50 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+struct bpf_iter_target_info {
+	struct list_head list;
+	const char *target;
+	const char *target_func_name;
+	const struct seq_operations *seq_ops;
+	u32 seq_priv_size;
+	u32 target_feature;
+};
+
+static struct list_head targets;
+static struct mutex targets_mutex;
+static bool bpf_iter_inited = false;
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
+{
+	struct bpf_iter_target_info *tinfo;
+
+	/* The earliest bpf_iter_reg_target() is called at init time
+	 * where the bpf_iter registration is serialized.
+	 */
+	if (!bpf_iter_inited) {
+		INIT_LIST_HEAD(&targets);
+		mutex_init(&targets_mutex);
+		bpf_iter_inited = true;
+	}
+
+	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+	if (!tinfo)
+		return -ENOMEM;
+
+	tinfo->target = reg_info->target;
+	tinfo->target_func_name = reg_info->target_func_name;
+	tinfo->seq_ops = reg_info->seq_ops;
+	tinfo->seq_priv_size = reg_info->seq_priv_size;
+	tinfo->target_feature = reg_info->target_feature;
+	INIT_LIST_HEAD(&tinfo->list);
+
+	mutex_lock(&targets_mutex);
+	list_add(&tinfo->list, &targets);
+	mutex_unlock(&targets_mutex);
+
+	return 0;
+}