diff mbox series

[bpf-next,v2,10/20] net: bpf: add netlink and ipv6_route bpf_iter targets

Message ID 20200504062558.2048168-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 4, 2020, 6:25 a.m. UTC
This patch added netlink and ipv6_route targets, using
the same seq_ops (except show() and minor changes for stop())
for /proc/net/{netlink,ipv6_route}.

The net namespace for these targets are the current net
namespace at file open stage, similar to
/proc/net/{netlink,ipv6_route} reference counting
the net namespace at seq_file open stage.

Since module is not supported for now, ipv6_route is
supported only if the IPV6 is built-in, i.e., not compiled
as a module. The restriction can be lifted once module
is properly supported for bpf_iter.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 fs/proc/proc_net.c       | 19 +++++++++
 include/linux/proc_fs.h  |  3 ++
 net/ipv6/ip6_fib.c       | 65 +++++++++++++++++++++++++++++-
 net/ipv6/route.c         | 27 +++++++++++++
 net/netlink/af_netlink.c | 87 +++++++++++++++++++++++++++++++++++++++-
 5 files changed, 197 insertions(+), 4 deletions(-)

Comments

Andrii Nakryiko May 6, 2020, 5:21 a.m. UTC | #1
On Sun, May 3, 2020 at 11:29 PM Yonghong Song <yhs@fb.com> wrote:
>
> This patch added netlink and ipv6_route targets, using
> the same seq_ops (except show() and minor changes for stop())
> for /proc/net/{netlink,ipv6_route}.
>
> The net namespace for these targets are the current net
> namespace at file open stage, similar to
> /proc/net/{netlink,ipv6_route} reference counting
> the net namespace at seq_file open stage.
>
> Since module is not supported for now, ipv6_route is
> supported only if the IPV6 is built-in, i.e., not compiled
> as a module. The restriction can be lifted once module
> is properly supported for bpf_iter.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  fs/proc/proc_net.c       | 19 +++++++++
>  include/linux/proc_fs.h  |  3 ++
>  net/ipv6/ip6_fib.c       | 65 +++++++++++++++++++++++++++++-
>  net/ipv6/route.c         | 27 +++++++++++++
>  net/netlink/af_netlink.c | 87 +++++++++++++++++++++++++++++++++++++++-
>  5 files changed, 197 insertions(+), 4 deletions(-)
>

[...]

>  int __init ip6_route_init(void)
>  {
>         int ret;
> @@ -6455,6 +6474,14 @@ int __init ip6_route_init(void)
>         if (ret)
>                 goto out_register_late_subsys;
>
> +#if IS_BUILTIN(CONFIG_IPV6)
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
> +       ret = bpf_iter_register();
> +       if (ret)
> +               goto out_register_late_subsys;

Seems like bpf_iter infra is missing unregistering API.
ip6_route_init(), if fails, undoes all the registrations, so probably
should also unregister ipv6_route target as well?

> +#endif
> +#endif
> +
>         for_each_possible_cpu(cpu) {
>                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
>

[...]

> +static void netlink_seq_stop(struct seq_file *seq, void *v)
> +{
> +       struct bpf_iter_meta meta;
> +       struct bpf_prog *prog;
> +
> +       if (!v) {
> +               meta.seq = seq;
> +               prog = bpf_iter_get_info(&meta, true);
> +               if (prog)
> +                       netlink_prog_seq_show(prog, &meta, v);

nit: netlink_prog_seq_show() can return failure (from BPF program),
but you are not returning it. Given seq_file's stop is not supposed to
fail, you can explicitly cast result to (void)? I think it's done in
few other places in BPF code, when return result is explicitly
ignored.


> +       }
> +
> +       netlink_native_seq_stop(seq, v);
> +}
> +#else

[...]
Yonghong Song May 6, 2020, 5:32 p.m. UTC | #2
On 5/5/20 10:21 PM, Andrii Nakryiko wrote:
> On Sun, May 3, 2020 at 11:29 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> This patch added netlink and ipv6_route targets, using
>> the same seq_ops (except show() and minor changes for stop())
>> for /proc/net/{netlink,ipv6_route}.
>>
>> The net namespace for these targets are the current net
>> namespace at file open stage, similar to
>> /proc/net/{netlink,ipv6_route} reference counting
>> the net namespace at seq_file open stage.
>>
>> Since module is not supported for now, ipv6_route is
>> supported only if the IPV6 is built-in, i.e., not compiled
>> as a module. The restriction can be lifted once module
>> is properly supported for bpf_iter.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   fs/proc/proc_net.c       | 19 +++++++++
>>   include/linux/proc_fs.h  |  3 ++
>>   net/ipv6/ip6_fib.c       | 65 +++++++++++++++++++++++++++++-
>>   net/ipv6/route.c         | 27 +++++++++++++
>>   net/netlink/af_netlink.c | 87 +++++++++++++++++++++++++++++++++++++++-
>>   5 files changed, 197 insertions(+), 4 deletions(-)
>>
> 
> [...]
> 
>>   int __init ip6_route_init(void)
>>   {
>>          int ret;
>> @@ -6455,6 +6474,14 @@ int __init ip6_route_init(void)
>>          if (ret)
>>                  goto out_register_late_subsys;
>>
>> +#if IS_BUILTIN(CONFIG_IPV6)
>> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
>> +       ret = bpf_iter_register();
>> +       if (ret)
>> +               goto out_register_late_subsys;
> 
> Seems like bpf_iter infra is missing unregistering API.
> ip6_route_init(), if fails, undoes all the registrations, so probably
> should also unregister ipv6_route target as well?

Yes, it is. But not in this function. In this function, 
bpf_iter_register() is the last one possibly causing error,
so there is no need to unregister here.

But there is another cleanup funciton called outside of this
function, I need to do proper unregister there.

Thanks for catching this.

> 
>> +#endif
>> +#endif
>> +
>>          for_each_possible_cpu(cpu) {
>>                  struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
>>
> 
> [...]
> 
>> +static void netlink_seq_stop(struct seq_file *seq, void *v)
>> +{
>> +       struct bpf_iter_meta meta;
>> +       struct bpf_prog *prog;
>> +
>> +       if (!v) {
>> +               meta.seq = seq;
>> +               prog = bpf_iter_get_info(&meta, true);
>> +               if (prog)
>> +                       netlink_prog_seq_show(prog, &meta, v);
> 
> nit: netlink_prog_seq_show() can return failure (from BPF program),
> but you are not returning it. Given seq_file's stop is not supposed to
> fail, you can explicitly cast result to (void)? I think it's done in

Yes, we can do this. An explicit casting expressed the intention.

> few other places in BPF code, when return result is explicitly
> ignored.
> 
> 
>> +       }
>> +
>> +       netlink_native_seq_stop(seq, v);
>> +}
>> +#else
> 
> [...]
>
diff mbox series

Patch

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4888c5224442..dba63b2429f0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -98,6 +98,25 @@  static const struct proc_ops proc_net_seq_ops = {
 	.proc_release	= seq_release_net,
 };
 
+int bpf_iter_init_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+	struct seq_net_private *p = priv_data;
+
+	p->net = get_net(current->nsproxy->net_ns);
+#endif
+	return 0;
+}
+
+void bpf_iter_fini_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+	struct seq_net_private *p = priv_data;
+
+	put_net(p->net);
+#endif
+}
+
 struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
 		struct proc_dir_entry *parent, const struct seq_operations *ops,
 		unsigned int state_size, void *data)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 45c05fd9c99d..03953c59807d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -105,6 +105,9 @@  struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
 						    void *data);
 extern struct pid *tgid_pidfd_to_pid(const struct file *file);
 
+extern int bpf_iter_init_seq_net(void *priv_data);
+extern void bpf_iter_fini_seq_net(void *priv_data);
+
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 /*
  * The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 46ed56719476..0ba2dc46a44c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2467,7 +2467,7 @@  void fib6_gc_cleanup(void)
 }
 
 #ifdef CONFIG_PROC_FS
-static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
 {
 	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
@@ -2625,7 +2625,7 @@  static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
 	return w->node && !(w->state == FWS_U && w->node == w->root);
 }
 
-static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
 	__releases(RCU_BH)
 {
 	struct net *net = seq_file_net(seq);
@@ -2637,6 +2637,67 @@  static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
 	rcu_read_unlock_bh();
 }
 
+#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__ipv6_route {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct fib6_info *, rt);
+};
+
+static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
+				    struct bpf_iter_meta *meta,
+				    void *v)
+{
+	struct bpf_iter__ipv6_route ctx;
+
+	ctx.meta = meta;
+	ctx.rt = v;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+	struct ipv6_route_iter *iter = seq->private;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	if (!prog)
+		return ipv6_route_native_seq_show(seq, v);
+
+	ret = ipv6_route_prog_seq_show(prog, &meta, v);
+	iter->w.leaf = NULL;
+
+	return ret;
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			ipv6_route_prog_seq_show(prog, &meta, v);
+	}
+
+	ipv6_route_native_seq_stop(seq, v);
+}
+#else
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+	return ipv6_route_native_seq_show(seq, v);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+	ipv6_route_native_seq_stop(seq, v);
+}
+#endif
+
 const struct seq_operations ipv6_route_seq_ops = {
 	.start	= ipv6_route_seq_start,
 	.next	= ipv6_route_seq_next,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3912aac7854d..aa2d3afc8d8b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6393,6 +6393,25 @@  void __init ip6_route_init_special_entries(void)
   #endif
 }
 
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
+
+static int __init bpf_iter_register(void)
+{
+	struct bpf_iter_reg reg_info = {
+		.target			= "ipv6_route",
+		.seq_ops		= &ipv6_route_seq_ops,
+		.init_seq_private	= bpf_iter_init_seq_net,
+		.fini_seq_private	= bpf_iter_fini_seq_net,
+		.seq_priv_size		= sizeof(struct ipv6_route_iter),
+	};
+
+	return bpf_iter_reg_target(&reg_info);
+}
+#endif
+#endif
+
 int __init ip6_route_init(void)
 {
 	int ret;
@@ -6455,6 +6474,14 @@  int __init ip6_route_init(void)
 	if (ret)
 		goto out_register_late_subsys;
 
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	ret = bpf_iter_register();
+	if (ret)
+		goto out_register_late_subsys;
+#endif
+#endif
+
 	for_each_possible_cpu(cpu) {
 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5ded01ca8b20..f0e4599c613c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2596,7 +2596,7 @@  static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return __netlink_seq_next(seq);
 }
 
-static void netlink_seq_stop(struct seq_file *seq, void *v)
+static void netlink_native_seq_stop(struct seq_file *seq, void *v)
 {
 	struct nl_seq_iter *iter = seq->private;
 
@@ -2607,7 +2607,7 @@  static void netlink_seq_stop(struct seq_file *seq, void *v)
 }
 
 
-static int netlink_seq_show(struct seq_file *seq, void *v)
+static int netlink_native_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -2634,6 +2634,68 @@  static int netlink_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__netlink {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct netlink_sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)
+
+static int netlink_prog_seq_show(struct bpf_prog *prog,
+				  struct bpf_iter_meta *meta,
+				  void *v)
+{
+	struct bpf_iter__netlink ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.sk = nlk_sk((struct sock *)v);
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	if (!prog)
+		return netlink_native_seq_show(seq, v);
+
+	if (v != SEQ_START_TOKEN)
+		return netlink_prog_seq_show(prog, &meta, v);
+
+	return 0;
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			netlink_prog_seq_show(prog, &meta, v);
+	}
+
+	netlink_native_seq_stop(seq, v);
+}
+#else
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+	return netlink_native_seq_show(seq, v);
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+	netlink_native_seq_stop(seq, v);
+}
+#endif
+
 static const struct seq_operations netlink_seq_ops = {
 	.start  = netlink_seq_start,
 	.next   = netlink_seq_next,
@@ -2740,6 +2802,21 @@  static const struct rhashtable_params netlink_rhashtable_params = {
 	.automatic_shrinking = true,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+static int __init bpf_iter_register(void)
+{
+	struct bpf_iter_reg reg_info = {
+		.target			= "netlink",
+		.seq_ops		= &netlink_seq_ops,
+		.init_seq_private	= bpf_iter_init_seq_net,
+		.fini_seq_private	= bpf_iter_fini_seq_net,
+		.seq_priv_size		= sizeof(struct nl_seq_iter),
+	};
+
+	return bpf_iter_reg_target(&reg_info);
+}
+#endif
+
 static int __init netlink_proto_init(void)
 {
 	int i;
@@ -2748,6 +2825,12 @@  static int __init netlink_proto_init(void)
 	if (err != 0)
 		goto out;
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	err = bpf_iter_register();
+	if (err)
+		goto out;
+#endif
+
 	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);