diff mbox series

[bpf-next] bpf: add support for other map types to bpf_map_lookup_and_delete_elem

Message ID 20200917135700.649909-1-luka.oreskovic@sartura.hr
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf-next] bpf: add support for other map types to bpf_map_lookup_and_delete_elem | expand

Commit Message

Luka Oreskovic Sept. 17, 2020, 1:57 p.m. UTC
Since this function already exists, it made sense to implement it for
map types other than stack and queue. This patch adds the necessary parts
from bpf_map_lookup_elem and bpf_map_delete_elem so it works as expected
for all map types.

Signed-off-by: Luka Oreskovic <luka.oreskovic@sartura.hr>
CC: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
CC: Luka Perkov <luka.perkov@sartura.hr>
---
 kernel/bpf/syscall.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

Comments

Song Liu Sept. 17, 2020, 11:21 p.m. UTC | #1
On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic
<luka.oreskovic@sartura.hr> wrote:
>
[...]

> +++ b/kernel/bpf/syscall.c
> @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>         if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
>                 return -EINVAL;
>
> +       if (attr->flags & ~BPF_F_LOCK)
> +               return -EINVAL;
> +

Please explain (in comments for commit log) the use of BPF_F_LOCK in
the commit log,
as it is new for BPF_MAP_LOOKUP_AND_DELETE_ELEM.

>         f = fdget(ufd);
>         map = __bpf_map_get(f);
>         if (IS_ERR(map))
> @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>                 goto err_put;
>         }
>
> +       if ((attr->flags & BPF_F_LOCK) &&
> +           !map_value_has_spin_lock(map)) {
> +               err = -EINVAL;
> +               goto err_put;
> +       }
> +
>         key = __bpf_copy_key(ukey, map->key_size);
>         if (IS_ERR(key)) {
>                 err = PTR_ERR(key);
>                 goto err_put;
>         }
>
> -       value_size = map->value_size;
> +       value_size = bpf_map_value_size(map);
>
>         err = -ENOMEM;
>         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>             map->map_type == BPF_MAP_TYPE_STACK) {
>                 err = map->ops->map_pop_elem(map, value);
>         } else {
> -               err = -ENOTSUPP;
> +               err = bpf_map_copy_value(map, key, value, attr->flags);
> +               if (err)
> +                       goto free_value;

IIUC, we cannot guarantee the value returned is the same as the value we
deleted. If this is true, I guess this may confuse the user with some
concurrency
bug.

Thanks,
Song

[...]
Luka Oreskovic Sept. 21, 2020, 11:12 a.m. UTC | #2
On Fri, Sep 18, 2020 at 1:21 AM Song Liu <song@kernel.org> wrote:
>
> On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic
> <luka.oreskovic@sartura.hr> wrote:
> >
> [...]
>
> > +++ b/kernel/bpf/syscall.c
> > @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >         if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
> >                 return -EINVAL;
> >
> > +       if (attr->flags & ~BPF_F_LOCK)
> > +               return -EINVAL;
> > +
>
> Please explain (in comments for commit log) the use of BPF_F_LOCK in
> the commit log,
> as it is new for BPF_MAP_LOOKUP_AND_DELETE_ELEM.
>
> >         f = fdget(ufd);
> >         map = __bpf_map_get(f);
> >         if (IS_ERR(map))
> > @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >                 goto err_put;
> >         }
> >
> > +       if ((attr->flags & BPF_F_LOCK) &&
> > +           !map_value_has_spin_lock(map)) {
> > +               err = -EINVAL;
> > +               goto err_put;
> > +       }
> > +
> >         key = __bpf_copy_key(ukey, map->key_size);
> >         if (IS_ERR(key)) {
> >                 err = PTR_ERR(key);
> >                 goto err_put;
> >         }
> >
> > -       value_size = map->value_size;
> > +       value_size = bpf_map_value_size(map);
> >
> >         err = -ENOMEM;
> >         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> > @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >             map->map_type == BPF_MAP_TYPE_STACK) {
> >                 err = map->ops->map_pop_elem(map, value);
> >         } else {
> > -               err = -ENOTSUPP;
> > +               err = bpf_map_copy_value(map, key, value, attr->flags);
> > +               if (err)
> > +                       goto free_value;
>
> IIUC, we cannot guarantee the value returned is the same as the value we
> deleted. If this is true, I guess this may confuse the user with some
> concurrency
> bug.
>
> Thanks,
> Song
>
> [...]

Thank you very much for your review. This is my first time contributing
to the linux community, so I am very grateful for any input.

For the first point, you are correct, the commit message should
have been more detailed. As for the second point, I see the problem,
but I'm not sure how to resolve it. Maybe moving the
bpf_disable_instrumentation call could work, but I'm not sure if
that could create different problems. I'll try to find and acceptable
solution and resubmit the patch.

Best wishes,
Luka Oreskovic
Andrii Nakryiko Sept. 22, 2020, 3:54 a.m. UTC | #3
On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic
<luka.oreskovic@sartura.hr> wrote:
>
> Since this function already exists, it made sense to implement it for
> map types other than stack and queue. This patch adds the necessary parts
> from bpf_map_lookup_elem and bpf_map_delete_elem so it works as expected
> for all map types.
>
> Signed-off-by: Luka Oreskovic <luka.oreskovic@sartura.hr>
> CC: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> CC: Luka Perkov <luka.perkov@sartura.hr>
> ---
>  kernel/bpf/syscall.c | 30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 2ce32cad5c8e..955de6ca8c45 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>         if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
>                 return -EINVAL;
>
> +       if (attr->flags & ~BPF_F_LOCK)

If you want to use attr->flags, you need to update
BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD few lines above. And every
new feature needs to come with selftests, so please check
tools/testing/selftests/bpf and latest patch sets adding new selftests
to see how it's done.

> +               return -EINVAL;
> +
>         f = fdget(ufd);
>         map = __bpf_map_get(f);
>         if (IS_ERR(map))
> @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>                 goto err_put;
>         }
>
> +       if ((attr->flags & BPF_F_LOCK) &&
> +           !map_value_has_spin_lock(map)) {
> +               err = -EINVAL;
> +               goto err_put;
> +       }
> +
>         key = __bpf_copy_key(ukey, map->key_size);
>         if (IS_ERR(key)) {
>                 err = PTR_ERR(key);
>                 goto err_put;
>         }
>
> -       value_size = map->value_size;
> +       value_size = bpf_map_value_size(map);
>
>         err = -ENOMEM;
>         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>             map->map_type == BPF_MAP_TYPE_STACK) {
>                 err = map->ops->map_pop_elem(map, value);
>         } else {
> -               err = -ENOTSUPP;
> +               err = bpf_map_copy_value(map, key, value, attr->flags);
> +               if (err)
> +                       goto free_value;
> +
> +               if (bpf_map_is_dev_bound(map)) {
> +                       err = bpf_map_offload_delete_elem(map, key);
> +               } else if (IS_FD_PROG_ARRAY(map) ||
> +                          map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
> +                       /* These maps require sleepable context */
> +                       err = map->ops->map_delete_elem(map, key);
> +               } else {
> +                       bpf_disable_instrumentation();
> +                       rcu_read_lock();
> +                       err = map->ops->map_delete_elem(map, key);
> +                       rcu_read_unlock();
> +                       bpf_enable_instrumentation();
> +                       maybe_wait_bpf_programs(map);
> +               }

The whole point of this operation is to do lookup and deletion of
elements atomically. You can't do it with a separate lookup, followed
by a separate delete operation. Those two have to be implemented by
each type of map specifically. E.g., for hashmap, you'd have a
separate function implementation that takes a bucket lock, copies
data, and deletes entry, while still holding the lock. Of course
internally you'd want to reuse as much code as possible, but it will
be a separate bpf_map_ops operation.

>         }
>
>         if (err)
> --
> 2.26.2
>
diff mbox series

Patch

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2ce32cad5c8e..955de6ca8c45 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1475,6 +1475,9 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
 		return -EINVAL;
 
+	if (attr->flags & ~BPF_F_LOCK)
+		return -EINVAL;
+
 	f = fdget(ufd);
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
@@ -1485,13 +1488,19 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
 		goto err_put;
 	}
 
-	value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
 	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1502,7 +1511,24 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
 	} else {
-		err = -ENOTSUPP;
+		err = bpf_map_copy_value(map, key, value, attr->flags);
+		if (err)
+			goto free_value;
+
+		if (bpf_map_is_dev_bound(map)) {
+			err = bpf_map_offload_delete_elem(map, key);
+		} else if (IS_FD_PROG_ARRAY(map) ||
+			   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+			/* These maps require sleepable context */
+			err = map->ops->map_delete_elem(map, key);
+		} else {
+			bpf_disable_instrumentation();
+			rcu_read_lock();
+			err = map->ops->map_delete_elem(map, key);
+			rcu_read_unlock();
+			bpf_enable_instrumentation();
+			maybe_wait_bpf_programs(map);
+		}
 	}
 
 	if (err)