diff mbox series

[RFC,bpf-next,1/4] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH

Message ID 20200522010526.14649-2-dsahern@kernel.org
State RFC
Delegated to: BPF Maintainers
Headers show
Series bpf: Add support for XDP programs in DEVMAPs | expand

Commit Message

David Ahern May 22, 2020, 1:05 a.m. UTC
Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
<device index, program id> pair. To do this, a new struct is needed in
bpf_dtab_netdev to hold the values to return on lookup.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 kernel/bpf/devmap.c | 53 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 13 deletions(-)

Comments

Jesper Dangaard Brouer May 22, 2020, 12:08 p.m. UTC | #1
On Thu, 21 May 2020 19:05:23 -0600
David Ahern <dsahern@kernel.org> wrote:

> Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
> <device index, program id> pair. To do this, a new struct is needed in
> bpf_dtab_netdev to hold the values to return on lookup.

I would like to see us leverage BTF instead of checking on the size
attr->value_size. E.g do the sanity check based on BTF.
Given I don't know the exact details on how this should be done, I will
look into it... I already promised Lorenzo, as we have already
discussed this on IRC.

So, you can Lorenzo can go ahead with this approach, and test the
use-case. And I'll try to figure out if-and-how we can leverage BTF
here.  Input from BTF experts will be much appreciated.


> Signed-off-by: David Ahern <dsahern@kernel.org>
> ---
>  kernel/bpf/devmap.c | 53 ++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index a51d9fb7a359..2c01ce434306 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -60,12 +60,19 @@ struct xdp_dev_bulk_queue {
>  	unsigned int count;
>  };
>  
> +/* devmap value can be dev index or dev index + prog id */
> +struct dev_map_ext_val {
> +	u32 ifindex;	/* must be first for compat with 4-byte values */
> +	u32 prog_id;
> +};
> +
>  struct bpf_dtab_netdev {
>  	struct net_device *dev; /* must be first member, due to tracepoint */
>  	struct hlist_node index_hlist;
>  	struct bpf_dtab *dtab;
>  	struct rcu_head rcu;
>  	unsigned int idx;
> +	struct dev_map_ext_val val;
>  };
>  
>  struct bpf_dtab {
> @@ -108,9 +115,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
>  	u64 cost = 0;
>  	int err;
>  
> -	/* check sanity of attributes */
> +	/* check sanity of attributes. 2 value sizes supported:
> +	 * 4 bytes: ifindex
> +	 * 8 bytes: ifindex + prog id
> +	 */
>  	if (attr->max_entries == 0 || attr->key_size != 4 ||
> -	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
> +	    (attr->value_size != 4 && attr->value_size != 8) ||
> +	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
>  		return -EINVAL;
>  
>  	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
[...]

>  static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
> @@ -568,8 +579,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
>  {
>  	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
>  	struct bpf_dtab_netdev *dev, *old_dev;
> -	u32 ifindex = *(u32 *)value;
>  	u32 i = *(u32 *)key;
> +	u32 ifindex;
> +
> +	if (map->value_size == 4) {
> +		ifindex = *(u32 *)value;
> +	} else {
> +		struct dev_map_ext_val *val = value;
> +
> +		ifindex = val->ifindex;
> +	}
>  
>  	if (unlikely(map_flags > BPF_EXIST))
>  		return -EINVAL;
> @@ -609,10 +628,18 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
>  {
>  	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
>  	struct bpf_dtab_netdev *dev, *old_dev;
> -	u32 ifindex = *(u32 *)value;
>  	u32 idx = *(u32 *)key;
>  	unsigned long flags;
>  	int err = -EEXIST;
> +	u32 ifindex;
> +
> +	if (map->value_size == 4) {
> +		ifindex = *(u32 *)value;
> +	} else {
> +		struct dev_map_ext_val *val = value;
> +
> +		ifindex = val->ifindex;
> +	}
>  
>  	if (unlikely(map_flags > BPF_EXIST || !ifindex))
>  		return -EINVAL;
Jesper Dangaard Brouer May 22, 2020, 4:04 p.m. UTC | #2
On Fri, 22 May 2020 14:08:05 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> On Thu, 21 May 2020 19:05:23 -0600
> David Ahern <dsahern@kernel.org> wrote:
> 
> > Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
> > <device index, program id> pair. To do this, a new struct is needed in
> > bpf_dtab_netdev to hold the values to return on lookup.  
> 
> I would like to see us leverage BTF instead of checking on the size
> attr->value_size. E.g do the sanity check based on BTF.
> Given I don't know the exact details on how this should be done, I will
> look into it... I already promised Lorenzo, as we have already
> discussed this on IRC.
> 
> So, you can Lorenzo can go ahead with this approach, and test the
> use-case. And I'll try to figure out if-and-how we can leverage BTF
> here.  Input from BTF experts will be much appreciated.

Published my current notes here:
 https://github.com/xdp-project/xdp-project/blob/BTF01-notes.public/areas/core/BTF_01_notes.org

And created PR that people can GitHub "subscribe" to, if you are interested:
 https://github.com/xdp-project/xdp-project/pull/36
David Ahern May 22, 2020, 6:11 p.m. UTC | #3
On 5/22/20 10:04 AM, Jesper Dangaard Brouer wrote:
> On Fri, 22 May 2020 14:08:05 +0200
> Jesper Dangaard Brouer <brouer@redhat.com> wrote:
> 
>> On Thu, 21 May 2020 19:05:23 -0600
>> David Ahern <dsahern@kernel.org> wrote:
>>
>>> Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
>>> <device index, program id> pair. To do this, a new struct is needed in
>>> bpf_dtab_netdev to hold the values to return on lookup.  
>>
>> I would like to see us leverage BTF instead of checking on the size
>> attr->value_size. E.g do the sanity check based on BTF.
>> Given I don't know the exact details on how this should be done, I will
>> look into it... I already promised Lorenzo, as we have already
>> discussed this on IRC.
>>
>> So, you can Lorenzo can go ahead with this approach, and test the
>> use-case. And I'll try to figure out if-and-how we can leverage BTF
>> here.  Input from BTF experts will be much appreciated.
> 
> Published my current notes here:
>  https://github.com/xdp-project/xdp-project/blob/BTF01-notes.public/areas/core/BTF_01_notes.org
> 
> And created PR that people can GitHub "subscribe" to, if you are interested:
>  https://github.com/xdp-project/xdp-project/pull/36
> 

thanks for compiling some notes.

Fundamentally, I do not see how this can work for something like the
program id where the kernel needs to know not just the type (u32) but
that it should take the id and do a lookup - convert the id into a
bpf_prog reference.
diff mbox series

Patch

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a51d9fb7a359..2c01ce434306 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -60,12 +60,19 @@  struct xdp_dev_bulk_queue {
 	unsigned int count;
 };
 
+/* devmap value can be dev index or dev index + prog id */
+struct dev_map_ext_val {
+	u32 ifindex;	/* must be first for compat with 4-byte values */
+	u32 prog_id;
+};
+
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
 	struct rcu_head rcu;
 	unsigned int idx;
+	struct dev_map_ext_val val;
 };
 
 struct bpf_dtab {
@@ -108,9 +115,13 @@  static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 	u64 cost = 0;
 	int err;
 
-	/* check sanity of attributes */
+	/* check sanity of attributes. 2 value sizes supported:
+	 * 4 bytes: ifindex
+	 * 8 bytes: ifindex + prog id
+	 */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+	    (attr->value_size != 4 && attr->value_size != 8) ||
+	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
 		return -EINVAL;
 
 	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -472,18 +483,15 @@  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
 
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
 								*(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
-
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void __dev_map_entry_free(struct rcu_head *rcu)
@@ -552,15 +560,18 @@  static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 		return ERR_PTR(-ENOMEM);
 
 	dev->dev = dev_get_by_index(net, ifindex);
-	if (!dev->dev) {
-		kfree(dev);
-		return ERR_PTR(-EINVAL);
-	}
+	if (!dev->dev)
+		goto err_out;
 
 	dev->idx = idx;
 	dev->dtab = dtab;
 
+	dev->val.ifindex = ifindex;
+
 	return dev;
+err_out:
+	kfree(dev);
+	return ERR_PTR(-EINVAL);
 }
 
 static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
@@ -568,8 +579,16 @@  static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
 	u32 i = *(u32 *)key;
+	u32 ifindex;
+
+	if (map->value_size == 4) {
+		ifindex = *(u32 *)value;
+	} else {
+		struct dev_map_ext_val *val = value;
+
+		ifindex = val->ifindex;
+	}
 
 	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
@@ -609,10 +628,18 @@  static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
 	u32 idx = *(u32 *)key;
 	unsigned long flags;
 	int err = -EEXIST;
+	u32 ifindex;
+
+	if (map->value_size == 4) {
+		ifindex = *(u32 *)value;
+	} else {
+		struct dev_map_ext_val *val = value;
+
+		ifindex = val->ifindex;
+	}
 
 	if (unlikely(map_flags > BPF_EXIST || !ifindex))
 		return -EINVAL;