diff mbox series

[bpf-next,1/2] bpf: change uapi for bpf iterator map elements

Message ID 20200802042126.2119843-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: change uapi for bpf iterator map elements | expand

Commit Message

Yonghong Song Aug. 2, 2020, 4:21 a.m. UTC
Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
	__aligned_u64	iter_info;
	__u32		iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
	union bpf_iter_link_info {
		struct {
			__u32   map_fd;
		} map;
	};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            | 10 ++++---
 include/uapi/linux/bpf.h       | 15 +++++-----
 kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
 kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
 kernel/bpf/syscall.c           |  2 +-
 net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
 tools/include/uapi/linux/bpf.h | 15 +++++-----
 7 files changed, 104 insertions(+), 64 deletions(-)

Comments

Andrii Nakryiko Aug. 3, 2020, 1:25 a.m. UTC | #1
On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>
> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> map elements") added bpf iterator support for
> map elements. The map element bpf iterator requires
> info to identify a particular map. In the above
> commit, the attr->link_create.target_fd is used
> to carry map_fd and an enum bpf_iter_link_info
> is added to uapi to specify the target_fd actually
> representing a map_fd:
>     enum bpf_iter_link_info {
>         BPF_ITER_LINK_UNSPEC = 0,
>         BPF_ITER_LINK_MAP_FD = 1,
>
>         MAX_BPF_ITER_LINK_INFO,
>     };
>
> This is an extensible approach as we can grow
> enumerator for pid, cgroup_id, etc. and we can
> unionize target_fd for pid, cgroup_id, etc.
> But in the future, there are chances that
> more complex customization may happen, e.g.,
> for tasks, it could be filtered based on
> both cgroup_id and user_id.
>
> This patch changed the uapi to have fields
>         __aligned_u64   iter_info;
>         __u32           iter_info_len;
> for additional iter_info for link_create.
> The iter_info is defined as
>         union bpf_iter_link_info {
>                 struct {
>                         __u32   map_fd;
>                 } map;
>         };
>
> So future extension for additional customization
> will be easier. The bpf_iter_link_info will be
> passed to target callback to validate and generic
> bpf_iter framework does not need to deal it any
> more.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h            | 10 ++++---
>  include/uapi/linux/bpf.h       | 15 +++++-----
>  kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>  kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>  kernel/bpf/syscall.c           |  2 +-
>  net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>  tools/include/uapi/linux/bpf.h | 15 +++++-----
>  7 files changed, 104 insertions(+), 64 deletions(-)
>

[...]

>  int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>  {
> +       union bpf_iter_link_info __user *ulinfo;
>         struct bpf_link_primer link_primer;
>         struct bpf_iter_target_info *tinfo;
> -       struct bpf_iter_aux_info aux = {};
> +       union bpf_iter_link_info linfo;
>         struct bpf_iter_link *link;
> -       u32 prog_btf_id, target_fd;
> +       u32 prog_btf_id, linfo_len;
>         bool existed = false;
> -       struct bpf_map *map;
>         int err;
>
> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
> +
> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
> +       linfo_len = attr->link_create.iter_info_len;
> +       if (ulinfo && linfo_len) {

We probably want to be more strict here: if either pointer or len is
non-zero, both should be present and valid. Otherwise we can have
garbage in iter_info, as long as iter_info_len is zero.

> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
> +                                              linfo_len);
> +               if (err)
> +                       return err;
> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
> +                       return -EFAULT;
> +       }
> +
>         prog_btf_id = prog->aux->attach_btf_id;
>         mutex_lock(&targets_mutex);
>         list_for_each_entry(tinfo, &targets, list) {
> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>         if (!existed)
>                 return -ENOENT;
>
> -       /* Make sure user supplied flags are target expected. */
> -       target_fd = attr->link_create.target_fd;
> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
> -               return -EINVAL;
> -       if (!attr->link_create.flags && target_fd)
> -               return -EINVAL;
> -

Please still ensure that no flags are specified.


>         link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>         if (!link)
>                 return -ENOMEM;
> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>                 return err;
>         }
>

[...]

> -static int bpf_iter_check_map(struct bpf_prog *prog,
> -                             struct bpf_iter_aux_info *aux)
> +static int bpf_iter_attach_map(struct bpf_prog *prog,
> +                              union bpf_iter_link_info *linfo,
> +                              struct bpf_iter_aux_info *aux)
>  {
> -       struct bpf_map *map = aux->map;
> +       struct bpf_map *map;
> +       int err = -EINVAL;
>
> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> +       if (!linfo->map.map_fd)
>                 return -EINVAL;

This could be -EBADF?

>
> -       if (prog->aux->max_rdonly_access > map->value_size)
> -               return -EACCES;
> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
> +       if (IS_ERR(map))
> +               return PTR_ERR(map);
> +
> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> +               goto put_map;
> +
> +       if (prog->aux->max_rdonly_access > map->value_size) {
> +               err = -EACCES;
> +               goto put_map;
> +       }

[...]
Yonghong Song Aug. 3, 2020, 2:23 a.m. UTC | #2
On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
> On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
>> map elements") added bpf iterator support for
>> map elements. The map element bpf iterator requires
>> info to identify a particular map. In the above
>> commit, the attr->link_create.target_fd is used
>> to carry map_fd and an enum bpf_iter_link_info
>> is added to uapi to specify the target_fd actually
>> representing a map_fd:
>>      enum bpf_iter_link_info {
>>          BPF_ITER_LINK_UNSPEC = 0,
>>          BPF_ITER_LINK_MAP_FD = 1,
>>
>>          MAX_BPF_ITER_LINK_INFO,
>>      };
>>
>> This is an extensible approach as we can grow
>> enumerator for pid, cgroup_id, etc. and we can
>> unionize target_fd for pid, cgroup_id, etc.
>> But in the future, there are chances that
>> more complex customization may happen, e.g.,
>> for tasks, it could be filtered based on
>> both cgroup_id and user_id.
>>
>> This patch changed the uapi to have fields
>>          __aligned_u64   iter_info;
>>          __u32           iter_info_len;
>> for additional iter_info for link_create.
>> The iter_info is defined as
>>          union bpf_iter_link_info {
>>                  struct {
>>                          __u32   map_fd;
>>                  } map;
>>          };
>>
>> So future extension for additional customization
>> will be easier. The bpf_iter_link_info will be
>> passed to target callback to validate and generic
>> bpf_iter framework does not need to deal it any
>> more.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h            | 10 ++++---
>>   include/uapi/linux/bpf.h       | 15 +++++-----
>>   kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>>   kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>>   kernel/bpf/syscall.c           |  2 +-
>>   net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>>   tools/include/uapi/linux/bpf.h | 15 +++++-----
>>   7 files changed, 104 insertions(+), 64 deletions(-)
>>
> 
> [...]
> 
>>   int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>   {
>> +       union bpf_iter_link_info __user *ulinfo;
>>          struct bpf_link_primer link_primer;
>>          struct bpf_iter_target_info *tinfo;
>> -       struct bpf_iter_aux_info aux = {};
>> +       union bpf_iter_link_info linfo;
>>          struct bpf_iter_link *link;
>> -       u32 prog_btf_id, target_fd;
>> +       u32 prog_btf_id, linfo_len;
>>          bool existed = false;
>> -       struct bpf_map *map;
>>          int err;
>>
>> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
>> +
>> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
>> +       linfo_len = attr->link_create.iter_info_len;
>> +       if (ulinfo && linfo_len) {
> 
> We probably want to be more strict here: if either pointer or len is
> non-zero, both should be present and valid. Otherwise we can have
> garbage in iter_info, as long as iter_info_len is zero.

yes, it is possible iter_info_len = 0 and iter_info is not null and
if this happens, iter_info will not be examined.

in kernel, we have places this is handled similarly. For example,
for cgroup bpf_prog query.

kernel/bpf/cgroup.c, function __cgroup_bpf_query

   __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
   ...
   if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
     return 0;

In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
or prog_ids == NULL and prog_cnt != 0, and we won't return error
to user space.

Not 100% sure whether we have convention here or not.

> 
>> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
>> +                                              linfo_len);
>> +               if (err)
>> +                       return err;
>> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
>> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
>> +                       return -EFAULT;
>> +       }
>> +
>>          prog_btf_id = prog->aux->attach_btf_id;
>>          mutex_lock(&targets_mutex);
>>          list_for_each_entry(tinfo, &targets, list) {
>> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>          if (!existed)
>>                  return -ENOENT;
>>
>> -       /* Make sure user supplied flags are target expected. */
>> -       target_fd = attr->link_create.target_fd;
>> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
>> -               return -EINVAL;
>> -       if (!attr->link_create.flags && target_fd)
>> -               return -EINVAL;
>> -
> 
> Please still ensure that no flags are specified.

Make sense. I also need to ensure target_fd is 0 since it is not used 
any more.

> 
> 
>>          link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>>          if (!link)
>>                  return -ENOMEM;
>> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>                  return err;
>>          }
>>
> 
> [...]
> 
>> -static int bpf_iter_check_map(struct bpf_prog *prog,
>> -                             struct bpf_iter_aux_info *aux)
>> +static int bpf_iter_attach_map(struct bpf_prog *prog,
>> +                              union bpf_iter_link_info *linfo,
>> +                              struct bpf_iter_aux_info *aux)
>>   {
>> -       struct bpf_map *map = aux->map;
>> +       struct bpf_map *map;
>> +       int err = -EINVAL;
>>
>> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
>> +       if (!linfo->map.map_fd)
>>                  return -EINVAL;
> 
> This could be -EBADF?

Good suggestion. Will do.

> 
>>
>> -       if (prog->aux->max_rdonly_access > map->value_size)
>> -               return -EACCES;
>> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
>> +       if (IS_ERR(map))
>> +               return PTR_ERR(map);
>> +
>> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
>> +               goto put_map;
>> +
>> +       if (prog->aux->max_rdonly_access > map->value_size) {
>> +               err = -EACCES;
>> +               goto put_map;
>> +       }
> 
> [...]
>
Andrii Nakryiko Aug. 3, 2020, 5:11 a.m. UTC | #3
On Sun, Aug 2, 2020 at 7:23 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
> > On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> >> map elements") added bpf iterator support for
> >> map elements. The map element bpf iterator requires
> >> info to identify a particular map. In the above
> >> commit, the attr->link_create.target_fd is used
> >> to carry map_fd and an enum bpf_iter_link_info
> >> is added to uapi to specify the target_fd actually
> >> representing a map_fd:
> >>      enum bpf_iter_link_info {
> >>          BPF_ITER_LINK_UNSPEC = 0,
> >>          BPF_ITER_LINK_MAP_FD = 1,
> >>
> >>          MAX_BPF_ITER_LINK_INFO,
> >>      };
> >>
> >> This is an extensible approach as we can grow
> >> enumerator for pid, cgroup_id, etc. and we can
> >> unionize target_fd for pid, cgroup_id, etc.
> >> But in the future, there are chances that
> >> more complex customization may happen, e.g.,
> >> for tasks, it could be filtered based on
> >> both cgroup_id and user_id.
> >>
> >> This patch changed the uapi to have fields
> >>          __aligned_u64   iter_info;
> >>          __u32           iter_info_len;
> >> for additional iter_info for link_create.
> >> The iter_info is defined as
> >>          union bpf_iter_link_info {
> >>                  struct {
> >>                          __u32   map_fd;
> >>                  } map;
> >>          };
> >>
> >> So future extension for additional customization
> >> will be easier. The bpf_iter_link_info will be
> >> passed to target callback to validate and generic
> >> bpf_iter framework does not need to deal it any
> >> more.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   include/linux/bpf.h            | 10 ++++---
> >>   include/uapi/linux/bpf.h       | 15 +++++-----
> >>   kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
> >>   kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
> >>   kernel/bpf/syscall.c           |  2 +-
> >>   net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
> >>   tools/include/uapi/linux/bpf.h | 15 +++++-----
> >>   7 files changed, 104 insertions(+), 64 deletions(-)
> >>
> >
> > [...]
> >
> >>   int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>   {
> >> +       union bpf_iter_link_info __user *ulinfo;
> >>          struct bpf_link_primer link_primer;
> >>          struct bpf_iter_target_info *tinfo;
> >> -       struct bpf_iter_aux_info aux = {};
> >> +       union bpf_iter_link_info linfo;
> >>          struct bpf_iter_link *link;
> >> -       u32 prog_btf_id, target_fd;
> >> +       u32 prog_btf_id, linfo_len;
> >>          bool existed = false;
> >> -       struct bpf_map *map;
> >>          int err;
> >>
> >> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
> >> +
> >> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
> >> +       linfo_len = attr->link_create.iter_info_len;
> >> +       if (ulinfo && linfo_len) {
> >
> > We probably want to be more strict here: if either pointer or len is
> > non-zero, both should be present and valid. Otherwise we can have
> > garbage in iter_info, as long as iter_info_len is zero.
>
> yes, it is possible iter_info_len = 0 and iter_info is not null and
> if this happens, iter_info will not be examined.
>
> in kernel, we have places this is handled similarly. For example,
> for cgroup bpf_prog query.
>
> kernel/bpf/cgroup.c, function __cgroup_bpf_query
>
>    __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>    ...
>    if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
>      return 0;
>
> In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
> or prog_ids == NULL and prog_cnt != 0, and we won't return error
> to user space.
>
> Not 100% sure whether we have convention here or not.

I don't know either, but I'd assume that we didn't think about 100%
strictness when originally implementing this. So I'd go with a very
strict check for this new functionality.

>
> >
> >> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
> >> +                                              linfo_len);
> >> +               if (err)
> >> +                       return err;
> >> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
> >> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
> >> +                       return -EFAULT;
> >> +       }
> >> +
> >>          prog_btf_id = prog->aux->attach_btf_id;
> >>          mutex_lock(&targets_mutex);
> >>          list_for_each_entry(tinfo, &targets, list) {
> >> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>          if (!existed)
> >>                  return -ENOENT;
> >>
> >> -       /* Make sure user supplied flags are target expected. */
> >> -       target_fd = attr->link_create.target_fd;
> >> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
> >> -               return -EINVAL;
> >> -       if (!attr->link_create.flags && target_fd)
> >> -               return -EINVAL;
> >> -
> >
> > Please still ensure that no flags are specified.
>
> Make sense. I also need to ensure target_fd is 0 since it is not used
> any more.
>

yep, good catch

> >
> >
> >>          link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
> >>          if (!link)
> >>                  return -ENOMEM;
> >> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>                  return err;
> >>          }
> >>
> >
> > [...]
> >
> >> -static int bpf_iter_check_map(struct bpf_prog *prog,
> >> -                             struct bpf_iter_aux_info *aux)
> >> +static int bpf_iter_attach_map(struct bpf_prog *prog,
> >> +                              union bpf_iter_link_info *linfo,
> >> +                              struct bpf_iter_aux_info *aux)
> >>   {
> >> -       struct bpf_map *map = aux->map;
> >> +       struct bpf_map *map;
> >> +       int err = -EINVAL;
> >>
> >> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +       if (!linfo->map.map_fd)
> >>                  return -EINVAL;
> >
> > This could be -EBADF?
>
> Good suggestion. Will do.
>
> >
> >>
> >> -       if (prog->aux->max_rdonly_access > map->value_size)
> >> -               return -EACCES;
> >> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
> >> +       if (IS_ERR(map))
> >> +               return PTR_ERR(map);
> >> +
> >> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +               goto put_map;
> >> +
> >> +       if (prog->aux->max_rdonly_access > map->value_size) {
> >> +               err = -EACCES;
> >> +               goto put_map;
> >> +       }
> >
> > [...]
> >
Yonghong Song Aug. 3, 2020, 6:21 a.m. UTC | #4
On 8/2/20 10:11 PM, Andrii Nakryiko wrote:
> On Sun, Aug 2, 2020 at 7:23 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
>>> On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
>>>> map elements") added bpf iterator support for
>>>> map elements. The map element bpf iterator requires
>>>> info to identify a particular map. In the above
>>>> commit, the attr->link_create.target_fd is used
>>>> to carry map_fd and an enum bpf_iter_link_info
>>>> is added to uapi to specify the target_fd actually
>>>> representing a map_fd:
>>>>       enum bpf_iter_link_info {
>>>>           BPF_ITER_LINK_UNSPEC = 0,
>>>>           BPF_ITER_LINK_MAP_FD = 1,
>>>>
>>>>           MAX_BPF_ITER_LINK_INFO,
>>>>       };
>>>>
>>>> This is an extensible approach as we can grow
>>>> enumerator for pid, cgroup_id, etc. and we can
>>>> unionize target_fd for pid, cgroup_id, etc.
>>>> But in the future, there are chances that
>>>> more complex customization may happen, e.g.,
>>>> for tasks, it could be filtered based on
>>>> both cgroup_id and user_id.
>>>>
>>>> This patch changed the uapi to have fields
>>>>           __aligned_u64   iter_info;
>>>>           __u32           iter_info_len;
>>>> for additional iter_info for link_create.
>>>> The iter_info is defined as
>>>>           union bpf_iter_link_info {
>>>>                   struct {
>>>>                           __u32   map_fd;
>>>>                   } map;
>>>>           };
>>>>
>>>> So future extension for additional customization
>>>> will be easier. The bpf_iter_link_info will be
>>>> passed to target callback to validate and generic
>>>> bpf_iter framework does not need to deal it any
>>>> more.
>>>>
>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>>    include/linux/bpf.h            | 10 ++++---
>>>>    include/uapi/linux/bpf.h       | 15 +++++-----
>>>>    kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>>>>    kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>>>>    kernel/bpf/syscall.c           |  2 +-
>>>>    net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>>>>    tools/include/uapi/linux/bpf.h | 15 +++++-----
>>>>    7 files changed, 104 insertions(+), 64 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>>    int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>>>    {
>>>> +       union bpf_iter_link_info __user *ulinfo;
>>>>           struct bpf_link_primer link_primer;
>>>>           struct bpf_iter_target_info *tinfo;
>>>> -       struct bpf_iter_aux_info aux = {};
>>>> +       union bpf_iter_link_info linfo;
>>>>           struct bpf_iter_link *link;
>>>> -       u32 prog_btf_id, target_fd;
>>>> +       u32 prog_btf_id, linfo_len;
>>>>           bool existed = false;
>>>> -       struct bpf_map *map;
>>>>           int err;
>>>>
>>>> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
>>>> +
>>>> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
>>>> +       linfo_len = attr->link_create.iter_info_len;
>>>> +       if (ulinfo && linfo_len) {
>>>
>>> We probably want to be more strict here: if either pointer or len is
>>> non-zero, both should be present and valid. Otherwise we can have
>>> garbage in iter_info, as long as iter_info_len is zero.
>>
>> yes, it is possible iter_info_len = 0 and iter_info is not null and
>> if this happens, iter_info will not be examined.
>>
>> in kernel, we have places this is handled similarly. For example,
>> for cgroup bpf_prog query.
>>
>> kernel/bpf/cgroup.c, function __cgroup_bpf_query
>>
>>     __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>>     ...
>>     if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
>>       return 0;
>>
>> In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
>> or prog_ids == NULL and prog_cnt != 0, and we won't return error
>> to user space.
>>
>> Not 100% sure whether we have convention here or not.
> 
> I don't know either, but I'd assume that we didn't think about 100%
> strictness when originally implementing this. So I'd go with a very
> strict check for this new functionality.

Agreed. This should be fine as the functionality is new.

> 
>>
>>>
>>>> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
>>>> +                                              linfo_len);
>>>> +               if (err)
>>>> +                       return err;
>>>> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
>>>> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
>>>> +                       return -EFAULT;
>>>> +       }
>>>> +
>>>>           prog_btf_id = prog->aux->attach_btf_id;
>>>>           mutex_lock(&targets_mutex);
>>>>           list_for_each_entry(tinfo, &targets, list) {
>>>> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>>>           if (!existed)
>>>>                   return -ENOENT;
>>>>
>>>> -       /* Make sure user supplied flags are target expected. */
>>>> -       target_fd = attr->link_create.target_fd;
>>>> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
>>>> -               return -EINVAL;
>>>> -       if (!attr->link_create.flags && target_fd)
>>>> -               return -EINVAL;
>>>> -
>>>
>>> Please still ensure that no flags are specified.
>>
>> Make sense. I also need to ensure target_fd is 0 since it is not used
>> any more.
>>
> 
> yep, good catch
> 
>>>
>>>
>>>>           link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>>>>           if (!link)
>>>>                   return -ENOMEM;
[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cef4ef0d2b4e..55f694b63164 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1214,15 +1214,17 @@  struct bpf_iter_aux_info {
 	struct bpf_map *map;
 };
 
-typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
-				       struct bpf_iter_aux_info *aux);
+typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
+					union bpf_iter_link_info *linfo,
+					struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
-	bpf_iter_check_target_t check_target;
+	bpf_iter_attach_target_t attach_target;
+	bpf_iter_detach_target_t detach_target;
 	u32 ctx_arg_info_size;
-	enum bpf_iter_link_info req_linfo;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@  struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@  enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@  union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 363b9cafc2d8..20d62020807f 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -338,8 +338,8 @@  static void bpf_iter_link_release(struct bpf_link *link)
 	struct bpf_iter_link *iter_link =
 		container_of(link, struct bpf_iter_link, link);
 
-	if (iter_link->aux.map)
-		bpf_map_put_with_uref(iter_link->aux.map);
+	if (iter_link->tinfo->reg_info->detach_target)
+		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 }
 
 static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,29 @@  bool bpf_link_is_iter(struct bpf_link *link)
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	union bpf_iter_link_info __user *ulinfo;
 	struct bpf_link_primer link_primer;
 	struct bpf_iter_target_info *tinfo;
-	struct bpf_iter_aux_info aux = {};
+	union bpf_iter_link_info linfo;
 	struct bpf_iter_link *link;
-	u32 prog_btf_id, target_fd;
+	u32 prog_btf_id, linfo_len;
 	bool existed = false;
-	struct bpf_map *map;
 	int err;
 
+	memset(&linfo, 0, sizeof(union bpf_iter_link_info));
+
+	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+	linfo_len = attr->link_create.iter_info_len;
+	if (ulinfo && linfo_len) {
+		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
+					       linfo_len);
+		if (err)
+			return err;
+		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
+		if (copy_from_user(&linfo, ulinfo, linfo_len))
+			return -EFAULT;
+	}
+
 	prog_btf_id = prog->aux->attach_btf_id;
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +425,6 @@  int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!existed)
 		return -ENOENT;
 
-	/* Make sure user supplied flags are target expected. */
-	target_fd = attr->link_create.target_fd;
-	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
-		return -EINVAL;
-	if (!attr->link_create.flags && target_fd)
-		return -EINVAL;
-
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 	if (!link)
 		return -ENOMEM;
@@ -431,28 +438,15 @@  int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		return err;
 	}
 
-	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
-		map = bpf_map_get_with_uref(target_fd);
-		if (IS_ERR(map)) {
-			err = PTR_ERR(map);
-			goto cleanup_link;
-		}
-
-		aux.map = map;
-		err = tinfo->reg_info->check_target(prog, &aux);
+	if (tinfo->reg_info->attach_target) {
+		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
 		if (err) {
-			bpf_map_put_with_uref(map);
-			goto cleanup_link;
+			bpf_link_cleanup(&link_primer);
+			return err;
 		}
-
-		link->aux.map = map;
 	}
 
 	return bpf_link_settle(&link_primer);
-
-cleanup_link:
-	bpf_link_cleanup(&link_primer);
-	return err;
 }
 
 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index fbe1f557cb88..131603fe1cbf 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -98,12 +98,21 @@  static struct bpf_iter_reg bpf_map_reg_info = {
 	.seq_info		= &bpf_map_seq_info,
 };
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
 	u32 key_acc_size, value_acc_size, key_size, value_size;
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
 	bool is_percpu = false;
+	int err = -EINVAL;
+
+	if (!linfo->map.map_fd)
+		return -EINVAL;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@  static int bpf_iter_check_map(struct bpf_prog *prog,
 	else if (map->map_type != BPF_MAP_TYPE_HASH &&
 		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 		 map->map_type != BPF_MAP_TYPE_ARRAY)
-		return -EINVAL;
+		goto put_map;
 
 	key_acc_size = prog->aux->max_rdonly_access;
 	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@  static int bpf_iter_check_map(struct bpf_prog *prog,
 	else
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 
-	if (key_acc_size > key_size || value_acc_size > value_size)
-		return -EACCES;
+	if (key_acc_size > key_size || value_acc_size > value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@  DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
 
 static const struct bpf_iter_reg bpf_map_elem_reg_info = {
 	.target			= "bpf_map_elem",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_map_elem, key),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2f343ce15747..86299a292214 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3883,7 +3883,7 @@  static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 	return -EINVAL;
 }
 
-#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
 static int link_create(union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3377c90a291..6c0bdb5a0b26 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -1384,18 +1384,39 @@  static int bpf_iter_init_sk_storage_map(void *priv_data,
 	return 0;
 }
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
+	int err = -EINVAL;
 
-	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+	if (!linfo->map.map_fd)
 		return -EINVAL;
 
-	if (prog->aux->max_rdonly_access > map->value_size)
-		return -EACCES;
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+		goto put_map;
+
+	if (prog->aux->max_rdonly_access > map->value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1435,8 @@  static const struct bpf_iter_seq_info iter_seq_info = {
 
 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
 	.target			= "bpf_sk_storage_map",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@  struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@  enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@  union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */