Message ID | 20171130002251.30498-1-jakub.kicinski@netronome.com |
---|---|
State | RFC, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [RFC] bpf: offload: report device information for offloaded programs | expand |
Hi, Jakub, please, read comments below. On 30.11.2017 03:22, Jakub Kicinski wrote: > Report to the user ifindex and namespace information of offloaded > programs. Always set dev_bound to true if program was loaded for > a device which has been since removed. Specify the namespace > using dev/inode combination. > > Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> > Reviewed-by: Simon Horman <simon.horman@netronome.com> > Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com> > --- > fs/nsfs.c | 2 +- > include/linux/bpf.h | 2 ++ > include/linux/proc_ns.h | 1 + > include/uapi/linux/bpf.h | 5 +++++ > kernel/bpf/offload.c | 34 ++++++++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 6 ++++++ > tools/include/uapi/linux/bpf.h | 5 +++++ > 7 files changed, 54 insertions(+), 1 deletion(-) [snip] > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c > index 8455b89d1bbf..da98349c647d 100644 > --- a/kernel/bpf/offload.c > +++ b/kernel/bpf/offload.c > @@ -16,9 +16,11 @@ > #include <linux/bpf.h> > #include <linux/bpf_verifier.h> > #include <linux/bug.h> > +#include <linux/kdev_t.h> > #include <linux/list.h> > #include <linux/netdevice.h> > #include <linux/printk.h> > +#include <linux/proc_ns.h> > #include <linux/rtnetlink.h> > > /* protected by RTNL */ > @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) > return bpf_prog_offload_translate(prog); > } > > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, > + struct bpf_prog *prog) > +{ > + struct bpf_dev_offload *offload = prog->aux->offload; > + struct inode *ns_inode; > + struct path ns_path; > + struct net *net; > + int ret = 0; > + void *ptr; > + > + info->dev_bound = 1; > + > + rtnl_lock(); rtnl_lock() is too big lock and it is already overused in kernel. Can't we use smaller lock in this driver to protect bpf_prog_offload_devs? I suppose rwlock would be appropriate for that. (Then, we may completely remove rtnl_lock() from bpf_prog_offload_init() and use readlocked dev_base_lock for __dev_get_by_index() instead and the new small_rwlock to link in the list. Not sure about bpf_prog_offload_verifier_prep() and bpf_prog_offload_translate() and which context expect net_device_ops->ndo_bpf users. Either they need rtnl or not). Then the below hunk: > + if (!offload->netdev) > + goto out; > + > + net = dev_net(offload->netdev); > + get_net(net); /* __ns_get_path() drops the reference */ will be: read_lock(&small_rwlock); if (!offload->netdev) goto out; net = dev_net(offload->netdev); get_net(net); /* __ns_get_path() drops the reference */ read_unlock(&small_rwlock); and rtnl_lock() won't be touched. > + ptr = __ns_get_path(&ns_path, &net->ns); > + ret = PTR_ERR_OR_ZERO(ptr); > + if (ret) > + goto out; > + ns_inode = ns_path.dentry->d_inode; > + > + info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev); > + info->ns_inode = ns_inode->i_ino; > + info->ifindex = offload->netdev->ifindex; > +out: > + rtnl_unlock(); > + return ret; > +} > + [snip] Kirill
Hi Kirill, On Thu, 30 Nov 2017 16:19:13 +0300, Kirill Tkhai wrote: > > @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) > > return bpf_prog_offload_translate(prog); > > } > > > > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, > > + struct bpf_prog *prog) > > +{ > > + struct bpf_dev_offload *offload = prog->aux->offload; > > + struct inode *ns_inode; > > + struct path ns_path; > > + struct net *net; > > + int ret = 0; > > + void *ptr; > > + > > + info->dev_bound = 1; > > + > > + rtnl_lock(); > > rtnl_lock() is too big lock and it is already overused in kernel. > Can't we use smaller lock in this driver to protect bpf_prog_offload_devs? > I suppose rwlock would be appropriate for that. > > (Then, we may completely remove rtnl_lock() from bpf_prog_offload_init() > and use readlocked dev_base_lock for __dev_get_by_index() instead and > the new small_rwlock to link in the list. > > Not sure about bpf_prog_offload_verifier_prep() and bpf_prog_offload_translate() > and which context expect net_device_ops->ndo_bpf users. Either they need rtnl > or not). Thanks for the comments, removing the use of rtnl_lock is definitely on my todo list!
diff --git a/fs/nsfs.c b/fs/nsfs.c index ef243e14b6eb..d2b89372544a 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode) ns->ops->put(ns); } -static void *__ns_get_path(struct path *path, struct ns_common *ns) +void *__ns_get_path(struct path *path, struct ns_common *ns) { struct vfsmount *mnt = nsfs_mnt; struct dentry *dentry; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..fc7ab26e10bf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -516,6 +516,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 2ff18c9840a7..1733359cf713 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns) extern struct file *proc_ns_fget(int fd); #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) +extern void *__ns_get_path(struct path *path, struct ns_common *ns); extern void *ns_get_path(struct path *path, struct task_struct *task, const struct proc_ns_operations *ns_ops); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c223ab30293..3183674496a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -910,6 +910,11 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 dev_bound:1; + __u32 reserved:31; + __u32 ifindex; + __u64 ns_dev; + __u64 ns_inode; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8455b89d1bbf..da98349c647d 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -16,9 +16,11 @@ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/bug.h> +#include <linux/kdev_t.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> +#include <linux/proc_ns.h> #include <linux/rtnetlink.h> /* protected by RTNL */ @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) return bpf_prog_offload_translate(prog); } +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog) +{ + struct bpf_dev_offload *offload = prog->aux->offload; + struct inode *ns_inode; + struct path ns_path; + struct net *net; + int ret = 0; + void *ptr; + + info->dev_bound = 1; + + rtnl_lock(); + if (!offload->netdev) + goto out; + + net = dev_net(offload->netdev); + get_net(net); /* __ns_get_path() drops the reference */ + ptr = __ns_get_path(&ns_path, &net->ns); + ret = PTR_ERR_OR_ZERO(ptr); + if (ret) + goto out; + ns_inode = ns_path.dentry->d_inode; + + info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + info->ns_inode = ns_inode->i_ino; + info->ifindex = offload->netdev->ifindex; +out: + rtnl_unlock(); + return ret; +} + const struct bpf_prog_ops bpf_offload_prog_ops = { }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2c4cfeaa8d5e..101ee3a3e80e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1616,6 +1616,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } + if (bpf_prog_is_dev_bound(prog->aux)) { + err = bpf_prog_offload_info_fill(&info, prog); + if (err) + return err; + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab30293..3183674496a2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -910,6 +910,11 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 dev_bound:1; + __u32 reserved:31; + __u32 ifindex; + __u64 ns_dev; + __u64 ns_inode; } __attribute__((aligned(8))); struct bpf_map_info {