diff mbox series

[RFC] bpf: offload: report device information for offloaded programs

Message ID 20171130002251.30498-1-jakub.kicinski@netronome.com
State RFC, archived
Delegated to: BPF Maintainers
Headers show
Series [RFC] bpf: offload: report device information for offloaded programs | expand

Commit Message

Jakub Kicinski Nov. 30, 2017, 12:22 a.m. UTC
Report to the user ifindex and namespace information of offloaded
programs.  Always set dev_bound to true if program was loaded for
a device which has been since removed.  Specify the namespace
using dev/inode combination.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 fs/nsfs.c                      |  2 +-
 include/linux/bpf.h            |  2 ++
 include/linux/proc_ns.h        |  1 +
 include/uapi/linux/bpf.h       |  5 +++++
 kernel/bpf/offload.c           | 34 ++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  6 ++++++
 tools/include/uapi/linux/bpf.h |  5 +++++
 7 files changed, 54 insertions(+), 1 deletion(-)

Comments

Kirill Tkhai Nov. 30, 2017, 1:19 p.m. UTC | #1
Hi, Jakub,

please, read comments below.

On 30.11.2017 03:22, Jakub Kicinski wrote:
> Report to the user ifindex and namespace information of offloaded
> programs.  Always set dev_bound to true if program was loaded for
> a device which has been since removed.  Specify the namespace
> using dev/inode combination.
> 
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Simon Horman <simon.horman@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
> ---
>  fs/nsfs.c                      |  2 +-
>  include/linux/bpf.h            |  2 ++
>  include/linux/proc_ns.h        |  1 +
>  include/uapi/linux/bpf.h       |  5 +++++
>  kernel/bpf/offload.c           | 34 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c           |  6 ++++++
>  tools/include/uapi/linux/bpf.h |  5 +++++
>  7 files changed, 54 insertions(+), 1 deletion(-)

[snip]

> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 8455b89d1bbf..da98349c647d 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -16,9 +16,11 @@
>  #include <linux/bpf.h>
>  #include <linux/bpf_verifier.h>
>  #include <linux/bug.h>
> +#include <linux/kdev_t.h>
>  #include <linux/list.h>
>  #include <linux/netdevice.h>
>  #include <linux/printk.h>
> +#include <linux/proc_ns.h>
>  #include <linux/rtnetlink.h>
>  
>  /* protected by RTNL */
> @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
>  	return bpf_prog_offload_translate(prog);
>  }
>  
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +			       struct bpf_prog *prog)
> +{
> +	struct bpf_dev_offload *offload = prog->aux->offload;
> +	struct inode *ns_inode;
> +	struct path ns_path;
> +	struct net *net;
> +	int ret = 0;
> +	void *ptr;
> +
> +	info->dev_bound = 1;
> +
> +	rtnl_lock();

rtnl_lock() is too big lock and it is already overused in kernel.
Can't we use smaller lock in this driver to protect bpf_prog_offload_devs?
I suppose rwlock would be appropriate for that.

(Then, we may completely remove rtnl_lock() from bpf_prog_offload_init()
and use readlocked dev_base_lock for __dev_get_by_index() instead and
the new small_rwlock to link in the list.

Not sure about bpf_prog_offload_verifier_prep() and bpf_prog_offload_translate()
and which context expect net_device_ops->ndo_bpf users. Either they need rtnl
or not).

Then the below hunk:

> +	if (!offload->netdev)
> +		goto out;
> +
> +	net = dev_net(offload->netdev);
> +	get_net(net); /* __ns_get_path() drops the reference */

will be:

	read_lock(&small_rwlock);
	if (!offload->netdev)
		goto out;

	net = dev_net(offload->netdev);
	get_net(net); /* __ns_get_path() drops the reference */
	read_unlock(&small_rwlock);

and rtnl_lock() won't be touched.

> +	ptr = __ns_get_path(&ns_path, &net->ns);
> +	ret = PTR_ERR_OR_ZERO(ptr);
> +	if (ret)
> +		goto out;
> +	ns_inode = ns_path.dentry->d_inode;
> +
> +	info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
> +	info->ns_inode = ns_inode->i_ino;
> +	info->ifindex = offload->netdev->ifindex;
> +out:
> +	rtnl_unlock();
> +	return ret;
> +}
> +

[snip]

Kirill
Jakub Kicinski Dec. 1, 2017, 2:05 a.m. UTC | #2
Hi Kirill,

On Thu, 30 Nov 2017 16:19:13 +0300, Kirill Tkhai wrote:
> > @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
> >  	return bpf_prog_offload_translate(prog);
> >  }
> >  
> > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> > +			       struct bpf_prog *prog)
> > +{
> > +	struct bpf_dev_offload *offload = prog->aux->offload;
> > +	struct inode *ns_inode;
> > +	struct path ns_path;
> > +	struct net *net;
> > +	int ret = 0;
> > +	void *ptr;
> > +
> > +	info->dev_bound = 1;
> > +
> > +	rtnl_lock();  
> 
> rtnl_lock() is too big lock and it is already overused in kernel.
> Can't we use smaller lock in this driver to protect bpf_prog_offload_devs?
> I suppose rwlock would be appropriate for that.
> 
> (Then, we may completely remove rtnl_lock() from bpf_prog_offload_init()
> and use readlocked dev_base_lock for __dev_get_by_index() instead and
> the new small_rwlock to link in the list.
> 
> Not sure about bpf_prog_offload_verifier_prep() and bpf_prog_offload_translate()
> and which context expect net_device_ops->ndo_bpf users. Either they need rtnl
> or not).

Thanks for the comments, removing the use of rtnl_lock is definitely on
my todo list!
diff mbox series

Patch

diff --git a/fs/nsfs.c b/fs/nsfs.c
index ef243e14b6eb..d2b89372544a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -51,7 +51,7 @@  static void nsfs_evict(struct inode *inode)
 	ns->ops->put(ns);
 }
 
-static void *__ns_get_path(struct path *path, struct ns_common *ns)
+void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
 	struct dentry *dentry;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a210..fc7ab26e10bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -516,6 +516,8 @@  static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..1733359cf713 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -76,6 +76,7 @@  static inline int ns_alloc_inum(struct ns_common *ns)
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *__ns_get_path(struct path *path, struct ns_common *ns);
 extern void *ns_get_path(struct path *path, struct task_struct *task,
 			const struct proc_ns_operations *ns_ops);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c223ab30293..3183674496a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -910,6 +910,11 @@  struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 dev_bound:1;
+	__u32 reserved:31;
+	__u32 ifindex;
+	__u64 ns_dev;
+	__u64 ns_inode;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..da98349c647d 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,9 +16,11 @@ 
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bug.h>
+#include <linux/kdev_t.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
+#include <linux/proc_ns.h>
 #include <linux/rtnetlink.h>
 
 /* protected by RTNL */
@@ -164,6 +166,38 @@  int bpf_prog_offload_compile(struct bpf_prog *prog)
 	return bpf_prog_offload_translate(prog);
 }
 
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog)
+{
+	struct bpf_dev_offload *offload = prog->aux->offload;
+	struct inode *ns_inode;
+	struct path ns_path;
+	struct net *net;
+	int ret = 0;
+	void *ptr;
+
+	info->dev_bound = 1;
+
+	rtnl_lock();
+	if (!offload->netdev)
+		goto out;
+
+	net = dev_net(offload->netdev);
+	get_net(net); /* __ns_get_path() drops the reference */
+	ptr = __ns_get_path(&ns_path, &net->ns);
+	ret = PTR_ERR_OR_ZERO(ptr);
+	if (ret)
+		goto out;
+	ns_inode = ns_path.dentry->d_inode;
+
+	info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+	info->ns_inode = ns_inode->i_ino;
+	info->ifindex = offload->netdev->ifindex;
+out:
+	rtnl_unlock();
+	return ret;
+}
+
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e..101ee3a3e80e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1616,6 +1616,12 @@  static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			return -EFAULT;
 	}
 
+	if (bpf_prog_is_dev_bound(prog->aux)) {
+		err = bpf_prog_offload_info_fill(&info, prog);
+		if (err)
+			return err;
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c223ab30293..3183674496a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -910,6 +910,11 @@  struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 dev_bound:1;
+	__u32 reserved:31;
+	__u32 ifindex;
+	__u64 ns_dev;
+	__u64 ns_inode;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {