diff mbox

[RFC,v2,net-next,07/16] bpf: add lookup/update/delete/iterate methods to BPF maps

Message ID 1405657206-12060-8-git-send-email-ast@plumgrid.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Alexei Starovoitov July 18, 2014, 4:19 a.m. UTC
'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf_map_create(map_type, struct nlattr *attr, int len)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf_map_lookup_elem(int fd, void *key, void *value)
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf_map_update_elem(int fd, void *key, void *value)
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf_map_delete_elem(int fd, void *key)

- iterate map elements (based on input key return next_key)
  err = bpf_map_get_next_key(int fd, void *key, void *next_key)

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 include/linux/bpf.h      |    6 ++
 include/uapi/linux/bpf.h |   25 ++++++
 kernel/bpf/syscall.c     |  209 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 240 insertions(+)

Comments

Kees Cook July 23, 2014, 6:25 p.m. UTC | #1
On Thu, Jul 17, 2014 at 9:19 PM, Alexei Starovoitov <ast@plumgrid.com> wrote:
> 'maps' is a generic storage of different types for sharing data between kernel
> and userspace.
>
> The maps are accessed from user space via BPF syscall, which has commands:
>
> - create a map with given type and attributes
>   fd = bpf_map_create(map_type, struct nlattr *attr, int len)
>   returns fd or negative error
>
> - lookup key in a given map referenced by fd
>   err = bpf_map_lookup_elem(int fd, void *key, void *value)
>   returns zero and stores found elem into value or negative error
>
> - create or update key/value pair in a given map
>   err = bpf_map_update_elem(int fd, void *key, void *value)
>   returns zero or negative error
>
> - find and delete element by key in a given map
>   err = bpf_map_delete_elem(int fd, void *key)
>
> - iterate map elements (based on input key return next_key)
>   err = bpf_map_get_next_key(int fd, void *key, void *next_key)
>
> - close(fd) deletes the map
>
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> ---
>  include/linux/bpf.h      |    6 ++
>  include/uapi/linux/bpf.h |   25 ++++++
>  kernel/bpf/syscall.c     |  209 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 240 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 57af236a0eb4..91e2caf8edf9 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -18,6 +18,12 @@ struct bpf_map_ops {
>         /* funcs callable from userspace (via syscall) */
>         struct bpf_map *(*map_alloc)(struct nlattr *attrs[BPF_MAP_ATTR_MAX + 1]);
>         void (*map_free)(struct bpf_map *);
> +       int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
> +
> +       /* funcs callable from userspace and from eBPF programs */
> +       void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> +       int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
> +       int (*map_delete_elem)(struct bpf_map *map, void *key);
>  };
>
>  struct bpf_map {
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index dcc7eb97a64a..5e1bfbc9cdc7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -308,6 +308,31 @@ enum bpf_cmd {
>          * map is deleted when fd is closed
>          */
>         BPF_MAP_CREATE,
> +
> +       /* lookup key in a given map referenced by map_id
> +        * err = bpf_map_lookup_elem(int map_id, void *key, void *value)

This needs map_id documentation updates too?

> +        * returns zero and stores found elem into value
> +        * or negative error
> +        */
> +       BPF_MAP_LOOKUP_ELEM,
> +
> +       /* create or update key/value pair in a given map
> +        * err = bpf_map_update_elem(int map_id, void *key, void *value)
> +        * returns zero or negative error
> +        */
> +       BPF_MAP_UPDATE_ELEM,
> +
> +       /* find and delete elem by key in a given map
> +        * err = bpf_map_delete_elem(int map_id, void *key)
> +        * returns zero or negative error
> +        */
> +       BPF_MAP_DELETE_ELEM,
> +
> +       /* lookup key in a given map and return next key
> +        * err = bpf_map_get_elem(int map_id, void *key, void *next_key)
> +        * returns zero and stores next key or negative error
> +        */
> +       BPF_MAP_GET_NEXT_KEY,
>  };
>
>  enum bpf_map_attributes {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index c4a330642653..ca2be66845b3 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -13,6 +13,7 @@
>  #include <linux/syscalls.h>
>  #include <net/netlink.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/file.h>
>
>  /* mutex to protect insertion/deletion of map_id in IDR */
>  static DEFINE_MUTEX(bpf_map_lock);
> @@ -209,6 +210,202 @@ free_attr:
>         return err;
>  }
>
> +static int get_map_id(struct fd f)
> +{
> +       struct bpf_map *map;
> +
> +       if (!f.file)
> +               return -EBADF;
> +
> +       if (f.file->f_op != &bpf_map_fops) {
> +               fdput(f);

It feels weird to me to do the fdput inside this function. Instead,
should map_lookup_elem get a "err_put" label, instead?

> +               return -EINVAL;
> +       }
> +
> +       map = f.file->private_data;
> +
> +       return map->map_id;
> +}
> +
> +static int map_lookup_elem(int ufd, void __user *ukey, void __user *uvalue)
> +{
> +       struct fd f = fdget(ufd);
> +       struct bpf_map *map;
> +       void *key, *value;
> +       int err;
> +
> +       err = get_map_id(f);
> +       if (err < 0)
> +               return err;

for example:

    if (err < 0)
        goto fail_put;

> +
> +       rcu_read_lock();
> +       map = idr_find(&bpf_map_id_idr, err);
> +       err = -EINVAL;
> +       if (!map)
> +               goto err_unlock;
> +
> +       err = -ENOMEM;
> +       key = kmalloc(map->key_size, GFP_ATOMIC);
> +       if (!key)
> +               goto err_unlock;
> +
> +       err = -EFAULT;
> +       if (copy_from_user(key, ukey, map->key_size) != 0)
> +               goto free_key;
> +
> +       err = -ESRCH;
> +       value = map->ops->map_lookup_elem(map, key);
> +       if (!value)
> +               goto free_key;
> +
> +       err = -EFAULT;
> +       if (copy_to_user(uvalue, value, map->value_size) != 0)
> +               goto free_key;

I'm made uncomfortable with memory copying where explicit lengths from
userspace aren't being used. It does look like it would be redundant,
though. Are there other syscalls where the kernel may stomp on user
memory based on internal kernel sizes? I think this is fine as-is, but
it makes me want to think harder about it. :)

> +
> +       err = 0;
> +
> +free_key:
> +       kfree(key);
> +err_unlock:
> +       rcu_read_unlock();

fail_put:

> +       fdput(f);
> +       return err;
> +}
> +
> +static int map_update_elem(int ufd, void __user *ukey, void __user *uvalue)
> +{
> +       struct fd f = fdget(ufd);
> +       struct bpf_map *map;
> +       void *key, *value;
> +       int err;
> +
> +       err = get_map_id(f);
> +       if (err < 0)
> +               return err;

Same thing?

> +
> +       rcu_read_lock();
> +       map = idr_find(&bpf_map_id_idr, err);
> +       err = -EINVAL;
> +       if (!map)
> +               goto err_unlock;
> +
> +       err = -ENOMEM;
> +       key = kmalloc(map->key_size, GFP_ATOMIC);
> +       if (!key)
> +               goto err_unlock;
> +
> +       err = -EFAULT;
> +       if (copy_from_user(key, ukey, map->key_size) != 0)
> +               goto free_key;
> +
> +       err = -ENOMEM;
> +       value = kmalloc(map->value_size, GFP_ATOMIC);
> +       if (!value)
> +               goto free_key;
> +
> +       err = -EFAULT;
> +       if (copy_from_user(value, uvalue, map->value_size) != 0)
> +               goto free_value;
> +
> +       err = map->ops->map_update_elem(map, key, value);
> +
> +free_value:
> +       kfree(value);
> +free_key:
> +       kfree(key);
> +err_unlock:
> +       rcu_read_unlock();
> +       fdput(f);
> +       return err;
> +}
> +
> +static int map_delete_elem(int ufd, void __user *ukey)
> +{
> +       struct fd f = fdget(ufd);
> +       struct bpf_map *map;
> +       void *key;
> +       int err;
> +
> +       err = get_map_id(f);
> +       if (err < 0)
> +               return err;
> +
> +       rcu_read_lock();
> +       map = idr_find(&bpf_map_id_idr, err);
> +       err = -EINVAL;
> +       if (!map)
> +               goto err_unlock;
> +
> +       err = -ENOMEM;
> +       key = kmalloc(map->key_size, GFP_ATOMIC);
> +       if (!key)
> +               goto err_unlock;
> +
> +       err = -EFAULT;
> +       if (copy_from_user(key, ukey, map->key_size) != 0)
> +               goto free_key;
> +
> +       err = map->ops->map_delete_elem(map, key);
> +
> +free_key:
> +       kfree(key);
> +err_unlock:
> +       rcu_read_unlock();
> +       fdput(f);
> +       return err;
> +}
> +
> +static int map_get_next_key(int ufd, void __user *ukey, void __user *unext_key)
> +{
> +       struct fd f = fdget(ufd);
> +       struct bpf_map *map;
> +       void *key, *next_key;
> +       int err;
> +
> +       err = get_map_id(f);
> +       if (err < 0)
> +               return err;
> +
> +       rcu_read_lock();
> +       map = idr_find(&bpf_map_id_idr, err);
> +       err = -EINVAL;
> +       if (!map)
> +               goto err_unlock;
> +
> +       err = -ENOMEM;
> +       key = kmalloc(map->key_size, GFP_ATOMIC);
> +       if (!key)
> +               goto err_unlock;
> +
> +       err = -EFAULT;
> +       if (copy_from_user(key, ukey, map->key_size) != 0)
> +               goto free_key;
> +
> +       err = -ENOMEM;
> +       next_key = kmalloc(map->key_size, GFP_ATOMIC);

In the interests of defensiveness, I'd use kzalloc here.

> +       if (!next_key)
> +               goto free_key;
> +
> +       err = map->ops->map_get_next_key(map, key, next_key);
> +       if (err)
> +               goto free_next_key;
> +
> +       err = -EFAULT;
> +       if (copy_to_user(unext_key, next_key, map->key_size) != 0)
> +               goto free_next_key;
> +
> +       err = 0;
> +
> +free_next_key:
> +       kfree(next_key);
> +free_key:
> +       kfree(key);
> +err_unlock:
> +       rcu_read_unlock();
> +       fdput(f);
> +       return err;
> +}
> +
>  SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, unsigned long, arg3,
>                 unsigned long, arg4, unsigned long, arg5)
>  {
> @@ -219,6 +416,18 @@ SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, unsigned long, arg3,
>         case BPF_MAP_CREATE:
>                 return map_create((enum bpf_map_type) arg2,
>                                   (struct nlattr __user *) arg3, (int) arg4);
> +       case BPF_MAP_LOOKUP_ELEM:
> +               return map_lookup_elem((int) arg2, (void __user *) arg3,
> +                                      (void __user *) arg4);
> +       case BPF_MAP_UPDATE_ELEM:
> +               return map_update_elem((int) arg2, (void __user *) arg3,
> +                                      (void __user *) arg4);
> +       case BPF_MAP_DELETE_ELEM:
> +               return map_delete_elem((int) arg2, (void __user *) arg3);
> +
> +       case BPF_MAP_GET_NEXT_KEY:
> +               return map_get_next_key((int) arg2, (void __user *) arg3,
> +                                       (void __user *) arg4);

Same observation as the other syscall cmd: perhaps arg5 == 0 should be
checked? Also, since each of these functions looks up the fd and
builds the key, maybe those should be added to a common helper instead
of copy/pasting into each demuxed function?

-Kees

>         default:
>                 return -EINVAL;
>         }
> --
> 1.7.9.5
>
Alexei Starovoitov July 23, 2014, 7:49 p.m. UTC | #2
On Wed, Jul 23, 2014 at 11:25 AM, Kees Cook <keescook@chromium.org> wrote:
>> +
>> +       /* lookup key in a given map referenced by map_id
>> +        * err = bpf_map_lookup_elem(int map_id, void *key, void *value)
>
> This needs map_id documentation updates too?

yes. will grep for it just to make sure.

>> +static int get_map_id(struct fd f)
>> +{
>> +       struct bpf_map *map;
>> +
>> +       if (!f.file)
>> +               return -EBADF;
>> +
>> +       if (f.file->f_op != &bpf_map_fops) {
>> +               fdput(f);
>
> It feels weird to me to do the fdput inside this function. Instead,
> should map_lookup_elem get a "err_put" label, instead?

I don't think it will work, since I'm not sure that fd.flags will be zero
when fd.file == NULL. It looks so by analyzing return code path
in fs/file.c, but I wasn't sure that I followed all code paths,
so I just picked this style from fs/timerfd.c assuming it was
done this away on purpose and there can be the case where
fd.file == null and fd.flags !=0. In such case we cannot call fdput().

>> +       err = -EFAULT;
>> +       if (copy_to_user(uvalue, value, map->value_size) != 0)
>> +               goto free_key;
>
> I'm made uncomfortable with memory copying where explicit lengths from
> userspace aren't being used. It does look like it would be redundant,
> though. Are there other syscalls where the kernel may stomp on user
> memory based on internal kernel sizes? I think this is fine as-is, but
> it makes me want to think harder about it. :)

good question :)
key_size and value_size are passed initially from user space.
Kernel only verifies and allocates internal map elements with given
sizes. Then it copies the value back with the size it remembered.
If user space said at map creation time that value_size is 100,
it should be using it consistently in user space program.

>> +       err = -ENOMEM;
>> +       next_key = kmalloc(map->key_size, GFP_ATOMIC);
>
> In the interests of defensiveness, I'd use kzalloc here.

I think it would be an overkill. Map implementation must consume
all bytes of incoming 'key' and return exactly the same number
of bytes in 'next_key'. Otherwise the whole iteration over map
with 'get_next_key' won't work. So if map implementation is
broken, it will be seen right away. No security leak here :)

>> +       case BPF_MAP_GET_NEXT_KEY:
>> +               return map_get_next_key((int) arg2, (void __user *) arg3,
>> +                                       (void __user *) arg4);
>
> Same observation as the other syscall cmd: perhaps arg5 == 0 should be
> checked? Also, since each of these functions looks up the fd and

yes. will do.

> builds the key, maybe those should be added to a common helper instead
> of copy/pasting into each demuxed function?

well, get_map_id() is a common helper. I didn't move fdget() all
the way to switch statement, since it looks less readable.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kees Cook July 23, 2014, 8:25 p.m. UTC | #3
On Wed, Jul 23, 2014 at 12:49 PM, Alexei Starovoitov <ast@plumgrid.com> wrote:
> On Wed, Jul 23, 2014 at 11:25 AM, Kees Cook <keescook@chromium.org> wrote:
>>> +
>>> +       /* lookup key in a given map referenced by map_id
>>> +        * err = bpf_map_lookup_elem(int map_id, void *key, void *value)
>>
>> This needs map_id documentation updates too?
>
> yes. will grep for it just to make sure.
>
>>> +static int get_map_id(struct fd f)
>>> +{
>>> +       struct bpf_map *map;
>>> +
>>> +       if (!f.file)
>>> +               return -EBADF;
>>> +
>>> +       if (f.file->f_op != &bpf_map_fops) {
>>> +               fdput(f);
>>
>> It feels weird to me to do the fdput inside this function. Instead,
>> should map_lookup_elem get a "err_put" label, instead?
>
> I don't think it will work, since I'm not sure that fd.flags will be zero
> when fd.file == NULL. It looks so by analyzing return code path
> in fs/file.c, but I wasn't sure that I followed all code paths,
> so I just picked this style from fs/timerfd.c assuming it was
> done this away on purpose and there can be the case where
> fd.file == null and fd.flags !=0. In such case we cannot call fdput().

Yeah, hm, looking around, this does seem to be the case. I guess the
thought is that when get_map_id fails, struct fd has been handled.
Maybe add a comment above that function as a reminder?

>>> +       err = -EFAULT;
>>> +       if (copy_to_user(uvalue, value, map->value_size) != 0)
>>> +               goto free_key;
>>
>> I'm made uncomfortable with memory copying where explicit lengths from
>> userspace aren't being used. It does look like it would be redundant,
>> though. Are there other syscalls where the kernel may stomp on user
>> memory based on internal kernel sizes? I think this is fine as-is, but
>> it makes me want to think harder about it. :)
>
> good question :)
> key_size and value_size are passed initially from user space.
> Kernel only verifies and allocates internal map elements with given
> sizes. Then it copies the value back with the size it remembered.
> If user space said at map creation time that value_size is 100,
> it should be using it consistently in user space program.

Yeah, I think this should be fine as-is.

>
>>> +       err = -ENOMEM;
>>> +       next_key = kmalloc(map->key_size, GFP_ATOMIC);
>>
>> In the interests of defensiveness, I'd use kzalloc here.
>
> I think it would be an overkill. Map implementation must consume
> all bytes of incoming 'key' and return exactly the same number
> of bytes in 'next_key'. Otherwise the whole iteration over map
> with 'get_next_key' won't work. So if map implementation is
> broken, it will be seen right away. No security leak here :)

Okay, fair enough. I had a few similar suggestions later. I kind of
wish there was a kcalloc that didn't zero memory to handle the case of
multiplied size input, but no need to spend the time clearing.

>
>>> +       case BPF_MAP_GET_NEXT_KEY:
>>> +               return map_get_next_key((int) arg2, (void __user *) arg3,
>>> +                                       (void __user *) arg4);
>>
>> Same observation as the other syscall cmd: perhaps arg5 == 0 should be
>> checked? Also, since each of these functions looks up the fd and
>
> yes. will do.
>
>> builds the key, maybe those should be added to a common helper instead
>> of copy/pasting into each demuxed function?
>
> well, get_map_id() is a common helper. I didn't move fdget() all
> the way to switch statement, since it looks less readable.

-Kees
Alexei Starovoitov July 23, 2014, 9:22 p.m. UTC | #4
On Wed, Jul 23, 2014 at 1:25 PM, Kees Cook <keescook@chromium.org> wrote:
> On Wed, Jul 23, 2014 at 12:49 PM, Alexei Starovoitov <ast@plumgrid.com> wrote:
>> On Wed, Jul 23, 2014 at 11:25 AM, Kees Cook <keescook@chromium.org> wrote:
>>>> +
>>>> +       /* lookup key in a given map referenced by map_id
>>>> +        * err = bpf_map_lookup_elem(int map_id, void *key, void *value)
>>>
>>> This needs map_id documentation updates too?
>>
>> yes. will grep for it just to make sure.
>>
>>>> +static int get_map_id(struct fd f)
>>>> +{
>>>> +       struct bpf_map *map;
>>>> +
>>>> +       if (!f.file)
>>>> +               return -EBADF;
>>>> +
>>>> +       if (f.file->f_op != &bpf_map_fops) {
>>>> +               fdput(f);
>>>
>>> It feels weird to me to do the fdput inside this function. Instead,
>>> should map_lookup_elem get a "err_put" label, instead?
>>
>> I don't think it will work, since I'm not sure that fd.flags will be zero
>> when fd.file == NULL. It looks so by analyzing return code path
>> in fs/file.c, but I wasn't sure that I followed all code paths,
>> so I just picked this style from fs/timerfd.c assuming it was
>> done this away on purpose and there can be the case where
>> fd.file == null and fd.flags !=0. In such case we cannot call fdput().
>
> Yeah, hm, looking around, this does seem to be the case. I guess the
> thought is that when get_map_id fails, struct fd has been handled.

correct.

> Maybe add a comment above that function as a reminder?

yes. will do.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 57af236a0eb4..91e2caf8edf9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -18,6 +18,12 @@  struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(struct nlattr *attrs[BPF_MAP_ATTR_MAX + 1]);
 	void (*map_free)(struct bpf_map *);
+	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+	/* funcs callable from userspace and from eBPF programs */
+	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dcc7eb97a64a..5e1bfbc9cdc7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -308,6 +308,31 @@  enum bpf_cmd {
 	 * map is deleted when fd is closed
 	 */
 	BPF_MAP_CREATE,
+
+	/* lookup key in a given map referenced by map_id
+	 * err = bpf_map_lookup_elem(int map_id, void *key, void *value)
+	 * returns zero and stores found elem into value
+	 * or negative error
+	 */
+	BPF_MAP_LOOKUP_ELEM,
+
+	/* create or update key/value pair in a given map
+	 * err = bpf_map_update_elem(int map_id, void *key, void *value)
+	 * returns zero or negative error
+	 */
+	BPF_MAP_UPDATE_ELEM,
+
+	/* find and delete elem by key in a given map
+	 * err = bpf_map_delete_elem(int map_id, void *key)
+	 * returns zero or negative error
+	 */
+	BPF_MAP_DELETE_ELEM,
+
+	/* lookup key in a given map and return next key
+	 * err = bpf_map_get_elem(int map_id, void *key, void *next_key)
+	 * returns zero and stores next key or negative error
+	 */
+	BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_attributes {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c4a330642653..ca2be66845b3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@ 
 #include <linux/syscalls.h>
 #include <net/netlink.h>
 #include <linux/anon_inodes.h>
+#include <linux/file.h>
 
 /* mutex to protect insertion/deletion of map_id in IDR */
 static DEFINE_MUTEX(bpf_map_lock);
@@ -209,6 +210,202 @@  free_attr:
 	return err;
 }
 
+static int get_map_id(struct fd f)
+{
+	struct bpf_map *map;
+
+	if (!f.file)
+		return -EBADF;
+
+	if (f.file->f_op != &bpf_map_fops) {
+		fdput(f);
+		return -EINVAL;
+	}
+
+	map = f.file->private_data;
+
+	return map->map_id;
+}
+
+static int map_lookup_elem(int ufd, void __user *ukey, void __user *uvalue)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	err = get_map_id(f);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	map = idr_find(&bpf_map_id_idr, err);
+	err = -EINVAL;
+	if (!map)
+		goto err_unlock;
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_ATOMIC);
+	if (!key)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ESRCH;
+	value = map->ops->map_lookup_elem(map, key);
+	if (!value)
+		goto free_key;
+
+	err = -EFAULT;
+	if (copy_to_user(uvalue, value, map->value_size) != 0)
+		goto free_key;
+
+	err = 0;
+
+free_key:
+	kfree(key);
+err_unlock:
+	rcu_read_unlock();
+	fdput(f);
+	return err;
+}
+
+static int map_update_elem(int ufd, void __user *ukey, void __user *uvalue)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	err = get_map_id(f);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	map = idr_find(&bpf_map_id_idr, err);
+	err = -EINVAL;
+	if (!map)
+		goto err_unlock;
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_ATOMIC);
+	if (!key)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	value = kmalloc(map->value_size, GFP_ATOMIC);
+	if (!value)
+		goto free_key;
+
+	err = -EFAULT;
+	if (copy_from_user(value, uvalue, map->value_size) != 0)
+		goto free_value;
+
+	err = map->ops->map_update_elem(map, key, value);
+
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+err_unlock:
+	rcu_read_unlock();
+	fdput(f);
+	return err;
+}
+
+static int map_delete_elem(int ufd, void __user *ukey)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key;
+	int err;
+
+	err = get_map_id(f);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	map = idr_find(&bpf_map_id_idr, err);
+	err = -EINVAL;
+	if (!map)
+		goto err_unlock;
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_ATOMIC);
+	if (!key)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = map->ops->map_delete_elem(map, key);
+
+free_key:
+	kfree(key);
+err_unlock:
+	rcu_read_unlock();
+	fdput(f);
+	return err;
+}
+
+static int map_get_next_key(int ufd, void __user *ukey, void __user *unext_key)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *next_key;
+	int err;
+
+	err = get_map_id(f);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	map = idr_find(&bpf_map_id_idr, err);
+	err = -EINVAL;
+	if (!map)
+		goto err_unlock;
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_ATOMIC);
+	if (!key)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	next_key = kmalloc(map->key_size, GFP_ATOMIC);
+	if (!next_key)
+		goto free_key;
+
+	err = map->ops->map_get_next_key(map, key, next_key);
+	if (err)
+		goto free_next_key;
+
+	err = -EFAULT;
+	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+		goto free_next_key;
+
+	err = 0;
+
+free_next_key:
+	kfree(next_key);
+free_key:
+	kfree(key);
+err_unlock:
+	rcu_read_unlock();
+	fdput(f);
+	return err;
+}
+
 SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -219,6 +416,18 @@  SYSCALL_DEFINE5(bpf, int, cmd, unsigned long, arg2, unsigned long, arg3,
 	case BPF_MAP_CREATE:
 		return map_create((enum bpf_map_type) arg2,
 				  (struct nlattr __user *) arg3, (int) arg4);
+	case BPF_MAP_LOOKUP_ELEM:
+		return map_lookup_elem((int) arg2, (void __user *) arg3,
+				       (void __user *) arg4);
+	case BPF_MAP_UPDATE_ELEM:
+		return map_update_elem((int) arg2, (void __user *) arg3,
+				       (void __user *) arg4);
+	case BPF_MAP_DELETE_ELEM:
+		return map_delete_elem((int) arg2, (void __user *) arg3);
+
+	case BPF_MAP_GET_NEXT_KEY:
+		return map_get_next_key((int) arg2, (void __user *) arg3,
+					(void __user *) arg4);
 	default:
 		return -EINVAL;
 	}