Message ID | 153902586650.8888.7406217713259884259.stgit@kernel |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Implement queue/stack maps | expand |
On Mon, Oct 8, 2018 at 12:12 PM Mauricio Vasquez B <mauricio.vasquez@polito.it> wrote: > > The following patch implements a bpf queue/stack maps that > provides the peek/pop/push functions. There is not a direct > relationship between those functions and the current maps > syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, > this is mapped to the pop operation in the queue/stack maps > and it is still to implement in other kind of maps. Do we need this system call for other maps (non-stack/queue)? If not, maybe we can just call it POP, and only implement it for stack and queue? > > Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it> > --- > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 1 + > kernel/bpf/syscall.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 83 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 027697b6a22f..98c7eeb6d138 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -39,6 +39,7 @@ struct bpf_map_ops { > void *(*map_lookup_elem)(struct bpf_map *map, void *key); > int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); > int (*map_delete_elem)(struct bpf_map *map, void *key); > + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); > > /* funcs called by prog_array and perf_event_array map */ > void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index f9187b41dff6..3bb94aa2d408 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -103,6 +103,7 @@ enum bpf_cmd { > BPF_BTF_LOAD, > BPF_BTF_GET_FD_BY_ID, > BPF_TASK_FD_QUERY, > + BPF_MAP_LOOKUP_AND_DELETE_ELEM, > }; > > enum bpf_map_type { > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index eb75e8af73ff..c33d9303f72f 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -975,6 +975,84 @@ static int map_get_next_key(union bpf_attr *attr) > return err; > } > > +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value > + > +static int map_lookup_and_delete_elem(union bpf_attr *attr) > +{ > + void __user *ukey = u64_to_user_ptr(attr->key); > + void __user *uvalue = u64_to_user_ptr(attr->value); > + int ufd = attr->map_fd; > + struct bpf_map *map; > + void *key, *value, *ptr; > + u32 value_size; > + struct fd f; > + int err; > + > + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) > + return -EINVAL; > + > + f = fdget(ufd); > + map = __bpf_map_get(f); > + if (IS_ERR(map)) > + return PTR_ERR(map); > + > + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { > + err = -EPERM; > + goto err_put; > + } > + > + key = __bpf_copy_key(ukey, map->key_size); > + if (IS_ERR(key)) { > + err = PTR_ERR(key); > + goto err_put; > + } > + > + value_size = map->value_size; > + > + err = -ENOMEM; > + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > + if (!value) > + goto free_key; > + > + err = -EFAULT; > + if (copy_from_user(value, uvalue, value_size) != 0) > + goto free_value; > + > + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from > + * inside bpf map update or delete otherwise deadlocks are possible > + */ > + preempt_disable(); > + __this_cpu_inc(bpf_prog_active); > + if (!map->ops->map_lookup_and_delete_elem) { Why do have have this check here? Shouldn't it be check much earlier? If we really need it here, we need at least add the following: __this_cpu_dec(bpf_prog_active); preempt_enable(); > + err = -ENOTSUPP; > + goto free_value; > + } > + rcu_read_lock(); > + ptr = map->ops->map_lookup_and_delete_elem(map, key); > + if (ptr) > + memcpy(value, ptr, value_size); > + rcu_read_unlock(); > + err = ptr ? 0 : -ENOENT; > + __this_cpu_dec(bpf_prog_active); > + preempt_enable(); > + > + if (err) > + goto free_value; > + > + if (copy_to_user(uvalue, value, value_size) != 0) > + goto free_value; > + > + err = 0; > + > +free_value: > + kfree(value); > +free_key: > + kfree(key); > +err_put: > + fdput(f); > + return err; > +} > + > static const struct bpf_prog_ops * const bpf_prog_types[] = { > #define BPF_PROG_TYPE(_id, _name) \ > [_id] = & _name ## _prog_ops, > @@ -2448,6 +2526,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz > case BPF_TASK_FD_QUERY: > err = bpf_task_fd_query(&attr, uattr); > break; > + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: > + err = map_lookup_and_delete_elem(&attr); > + break; > default: > err = -EINVAL; > break; >
On 10/08/2018 08:13 PM, Song Liu wrote: > On Mon, Oct 8, 2018 at 12:12 PM Mauricio Vasquez B > <mauricio.vasquez@polito.it> wrote: >> The following patch implements a bpf queue/stack maps that >> provides the peek/pop/push functions. There is not a direct >> relationship between those functions and the current maps >> syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, >> this is mapped to the pop operation in the queue/stack maps >> and it is still to implement in other kind of maps. > Do we need this system call for other maps (non-stack/queue)? > If not, maybe we can just call it POP, and only implement it for > stack and queue? > Yes, this system call could also benefit other maps. The first idea was to add pop/push/peek system calls as well, but them Alexei realized it was too specific for queue/stack maps and we decided to go ahead with this solution that is more general. >> Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it> >> --- >> include/linux/bpf.h | 1 + >> include/uapi/linux/bpf.h | 1 + >> kernel/bpf/syscall.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 83 insertions(+) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index 027697b6a22f..98c7eeb6d138 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -39,6 +39,7 @@ struct bpf_map_ops { >> void *(*map_lookup_elem)(struct bpf_map *map, void *key); >> int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); >> int (*map_delete_elem)(struct bpf_map *map, void *key); >> + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); >> >> /* funcs called by prog_array and perf_event_array map */ >> void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index f9187b41dff6..3bb94aa2d408 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -103,6 +103,7 @@ enum bpf_cmd { >> BPF_BTF_LOAD, >> BPF_BTF_GET_FD_BY_ID, >> BPF_TASK_FD_QUERY, >> + BPF_MAP_LOOKUP_AND_DELETE_ELEM, >> }; >> >> enum bpf_map_type { >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c >> index eb75e8af73ff..c33d9303f72f 100644 >> --- a/kernel/bpf/syscall.c >> +++ b/kernel/bpf/syscall.c >> @@ -975,6 +975,84 @@ static int map_get_next_key(union bpf_attr *attr) >> return err; >> } >> >> +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value >> + >> +static int map_lookup_and_delete_elem(union bpf_attr *attr) >> +{ >> + void __user *ukey = u64_to_user_ptr(attr->key); >> + void __user *uvalue = u64_to_user_ptr(attr->value); >> + int ufd = attr->map_fd; >> + struct bpf_map *map; >> + void *key, *value, *ptr; >> + u32 value_size; >> + struct fd f; >> + int err; >> + >> + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) >> + return -EINVAL; >> + >> + f = fdget(ufd); >> + map = __bpf_map_get(f); >> + if (IS_ERR(map)) >> + return PTR_ERR(map); >> + >> + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { >> + err = -EPERM; >> + goto err_put; >> + } >> + >> + key = __bpf_copy_key(ukey, map->key_size); >> + if (IS_ERR(key)) { >> + err = PTR_ERR(key); >> + goto err_put; >> + } >> + >> + value_size = map->value_size; >> + >> + err = -ENOMEM; >> + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); >> + if (!value) >> + goto free_key; >> + >> + err = -EFAULT; >> + if (copy_from_user(value, uvalue, value_size) != 0) >> + goto free_value; >> + >> + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from >> + * inside bpf map update or delete otherwise deadlocks are possible >> + */ >> + preempt_disable(); >> + __this_cpu_inc(bpf_prog_active); >> + if (!map->ops->map_lookup_and_delete_elem) { > Why do have have this check here? Shouldn't it be check much earlier? > If we really need it here, we need at least add the following: In this particular patch the check can be done much earlier, but in next patch we need it on this position, so I leave it here to avoid moving around on next patch. > __this_cpu_dec(bpf_prog_active); > preempt_enable(); You are right, I missed that. Will fix for next version. > > >> + err = -ENOTSUPP; >> + goto free_value; >> + } >> + rcu_read_lock(); >> + ptr = map->ops->map_lookup_and_delete_elem(map, key); >> + if (ptr) >> + memcpy(value, ptr, value_size); >> + rcu_read_unlock(); >> + err = ptr ? 0 : -ENOENT; >> + __this_cpu_dec(bpf_prog_active); >> + preempt_enable(); >> + >> + if (err) >> + goto free_value; >> + >> + if (copy_to_user(uvalue, value, value_size) != 0) >> + goto free_value; >> + >> + err = 0; >> + >> +free_value: >> + kfree(value); >> +free_key: >> + kfree(key); >> +err_put: >> + fdput(f); >> + return err; >> +} >> + >> static const struct bpf_prog_ops * const bpf_prog_types[] = { >> #define BPF_PROG_TYPE(_id, _name) \ >> [_id] = & _name ## _prog_ops, >> @@ -2448,6 +2526,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz >> case BPF_TASK_FD_QUERY: >> err = bpf_task_fd_query(&attr, uattr); >> break; >> + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: >> + err = map_lookup_and_delete_elem(&attr); >> + break; >> default: >> err = -EINVAL; >> break; >>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 027697b6a22f..98c7eeb6d138 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,7 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f9187b41dff6..3bb94aa2d408 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index eb75e8af73ff..c33d9303f72f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -975,6 +975,84 @@ static int map_get_next_key(union bpf_attr *attr) return err; } +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value + +static int map_lookup_and_delete_elem(union bpf_attr *attr) +{ + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value, *ptr; + u32 value_size; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + + key = __bpf_copy_key(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + goto err_put; + } + + value_size = map->value_size; + + err = -ENOMEM; + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + + err = -EFAULT; + if (copy_from_user(value, uvalue, value_size) != 0) + goto free_value; + + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from + * inside bpf map update or delete otherwise deadlocks are possible + */ + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + if (!map->ops->map_lookup_and_delete_elem) { + err = -ENOTSUPP; + goto free_value; + } + rcu_read_lock(); + ptr = map->ops->map_lookup_and_delete_elem(map, key); + if (ptr) + memcpy(value, ptr, value_size); + rcu_read_unlock(); + err = ptr ? 0 : -ENOENT; + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + + if (err) + goto free_value; + + if (copy_to_user(uvalue, value, value_size) != 0) + goto free_value; + + err = 0; + +free_value: + kfree(value); +free_key: + kfree(key); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2448,6 +2526,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_TASK_FD_QUERY: err = bpf_task_fd_query(&attr, uattr); break; + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + err = map_lookup_and_delete_elem(&attr); + break; default: err = -EINVAL; break;
The following patch implements a bpf queue/stack maps that provides the peek/pop/push functions. There is not a direct relationship between those functions and the current maps syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, this is mapped to the pop operation in the queue/stack maps and it is still to implement in other kind of maps. Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+)