diff mbox series

[v4,bpf-next,7/9] libbpf: add libbpf support to batch ops

Message ID 20200114164614.47029-9-brianvv@google.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series add bpf batch ops to process more than 1 elem | expand

Commit Message

Brian Vazquez Jan. 14, 2020, 4:46 p.m. UTC
From: Yonghong Song <yhs@fb.com>

Added four libbpf API functions to support map batch operations:
  . int bpf_map_delete_batch( ... )
  . int bpf_map_lookup_batch( ... )
  . int bpf_map_lookup_and_delete_batch( ... )
  . int bpf_map_update_batch( ... )

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/bpf.h      | 22 +++++++++++++++
 tools/lib/bpf/libbpf.map |  4 +++
 3 files changed, 86 insertions(+)

Comments

Andrii Nakryiko Jan. 14, 2020, 6:36 p.m. UTC | #1
On Tue, Jan 14, 2020 at 8:46 AM Brian Vazquez <brianvv@google.com> wrote:
>
> From: Yonghong Song <yhs@fb.com>
>
> Added four libbpf API functions to support map batch operations:
>   . int bpf_map_delete_batch( ... )
>   . int bpf_map_lookup_batch( ... )
>   . int bpf_map_lookup_and_delete_batch( ... )
>   . int bpf_map_update_batch( ... )
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/bpf.h      | 22 +++++++++++++++
>  tools/lib/bpf/libbpf.map |  4 +++
>  3 files changed, 86 insertions(+)
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index 500afe478e94a..12ce8d275f7dc 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -452,6 +452,66 @@ int bpf_map_freeze(int fd)
>         return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
>  }
>
> +static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
> +                               void *out_batch, void *keys, void *values,
> +                               __u32 *count,
> +                               const struct bpf_map_batch_opts *opts)
> +{
> +       union bpf_attr attr = {};
> +       int ret;
> +
> +       if (!OPTS_VALID(opts, bpf_map_batch_opts))
> +               return -EINVAL;
> +
> +       memset(&attr, 0, sizeof(attr));
> +       attr.batch.map_fd = fd;
> +       attr.batch.in_batch = ptr_to_u64(in_batch);
> +       attr.batch.out_batch = ptr_to_u64(out_batch);
> +       attr.batch.keys = ptr_to_u64(keys);
> +       attr.batch.values = ptr_to_u64(values);
> +       if (count)
> +               attr.batch.count = *count;
> +       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
> +       attr.batch.flags = OPTS_GET(opts, flags, 0);
> +
> +       ret = sys_bpf(cmd, &attr, sizeof(attr));
> +       if (count)
> +               *count = attr.batch.count;

what if syscall failed, do you still want to assign *count then?

> +
> +       return ret;
> +}
> +

[...]
Brian Vazquez Jan. 14, 2020, 6:53 p.m. UTC | #2
On Tue, Jan 14, 2020 at 10:36 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jan 14, 2020 at 8:46 AM Brian Vazquez <brianvv@google.com> wrote:
> >
> > From: Yonghong Song <yhs@fb.com>
> >
> > Added four libbpf API functions to support map batch operations:
> >   . int bpf_map_delete_batch( ... )
> >   . int bpf_map_lookup_batch( ... )
> >   . int bpf_map_lookup_and_delete_batch( ... )
> >   . int bpf_map_update_batch( ... )
> >
> > Signed-off-by: Yonghong Song <yhs@fb.com>
> > ---
> >  tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
> >  tools/lib/bpf/bpf.h      | 22 +++++++++++++++
> >  tools/lib/bpf/libbpf.map |  4 +++
> >  3 files changed, 86 insertions(+)
> >
> > diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> > index 500afe478e94a..12ce8d275f7dc 100644
> > --- a/tools/lib/bpf/bpf.c
> > +++ b/tools/lib/bpf/bpf.c
> > @@ -452,6 +452,66 @@ int bpf_map_freeze(int fd)
> >         return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
> >  }
> >
> > +static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
> > +                               void *out_batch, void *keys, void *values,
> > +                               __u32 *count,
> > +                               const struct bpf_map_batch_opts *opts)
> > +{
> > +       union bpf_attr attr = {};
> > +       int ret;
> > +
> > +       if (!OPTS_VALID(opts, bpf_map_batch_opts))
> > +               return -EINVAL;
> > +
> > +       memset(&attr, 0, sizeof(attr));
> > +       attr.batch.map_fd = fd;
> > +       attr.batch.in_batch = ptr_to_u64(in_batch);
> > +       attr.batch.out_batch = ptr_to_u64(out_batch);
> > +       attr.batch.keys = ptr_to_u64(keys);
> > +       attr.batch.values = ptr_to_u64(values);
> > +       if (count)
> > +               attr.batch.count = *count;
> > +       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
> > +       attr.batch.flags = OPTS_GET(opts, flags, 0);
> > +
> > +       ret = sys_bpf(cmd, &attr, sizeof(attr));
> > +       if (count)
> > +               *count = attr.batch.count;
>
> what if syscall failed, do you still want to assign *count then?

Hi Andrii, thanks for taking a look.

attr.batch.count should report the number of entries correctly
processed before finding and error, an example could be when you
provided a buffer for 3 entries and the map only has 1, ret is going
to be -ENOENT meaning that you traversed the map and you still want to
assign *count.

That being said, the condition 'if (count)' is wrong and I think it
should be removed.

>
> > +
> > +       return ret;
> > +}
> > +
>
> [...]
Andrii Nakryiko Jan. 14, 2020, 7:13 p.m. UTC | #3
On Tue, Jan 14, 2020 at 10:54 AM Brian Vazquez <brianvv@google.com> wrote:
>
> On Tue, Jan 14, 2020 at 10:36 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Jan 14, 2020 at 8:46 AM Brian Vazquez <brianvv@google.com> wrote:
> > >
> > > From: Yonghong Song <yhs@fb.com>
> > >
> > > Added four libbpf API functions to support map batch operations:
> > >   . int bpf_map_delete_batch( ... )
> > >   . int bpf_map_lookup_batch( ... )
> > >   . int bpf_map_lookup_and_delete_batch( ... )
> > >   . int bpf_map_update_batch( ... )
> > >
> > > Signed-off-by: Yonghong Song <yhs@fb.com>
> > > ---
> > >  tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
> > >  tools/lib/bpf/bpf.h      | 22 +++++++++++++++
> > >  tools/lib/bpf/libbpf.map |  4 +++
> > >  3 files changed, 86 insertions(+)
> > >
> > > diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> > > index 500afe478e94a..12ce8d275f7dc 100644
> > > --- a/tools/lib/bpf/bpf.c
> > > +++ b/tools/lib/bpf/bpf.c
> > > @@ -452,6 +452,66 @@ int bpf_map_freeze(int fd)
> > >         return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
> > >  }
> > >
> > > +static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
> > > +                               void *out_batch, void *keys, void *values,
> > > +                               __u32 *count,
> > > +                               const struct bpf_map_batch_opts *opts)
> > > +{
> > > +       union bpf_attr attr = {};
> > > +       int ret;
> > > +
> > > +       if (!OPTS_VALID(opts, bpf_map_batch_opts))
> > > +               return -EINVAL;
> > > +
> > > +       memset(&attr, 0, sizeof(attr));
> > > +       attr.batch.map_fd = fd;
> > > +       attr.batch.in_batch = ptr_to_u64(in_batch);
> > > +       attr.batch.out_batch = ptr_to_u64(out_batch);
> > > +       attr.batch.keys = ptr_to_u64(keys);
> > > +       attr.batch.values = ptr_to_u64(values);
> > > +       if (count)
> > > +               attr.batch.count = *count;
> > > +       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
> > > +       attr.batch.flags = OPTS_GET(opts, flags, 0);
> > > +
> > > +       ret = sys_bpf(cmd, &attr, sizeof(attr));
> > > +       if (count)
> > > +               *count = attr.batch.count;
> >
> > what if syscall failed, do you still want to assign *count then?
>
> Hi Andrii, thanks for taking a look.
>
> attr.batch.count should report the number of entries correctly
> processed before finding and error, an example could be when you
> provided a buffer for 3 entries and the map only has 1, ret is going
> to be -ENOENT meaning that you traversed the map and you still want to
> assign *count.

ah, ok, tricky semantics :) if syscall failed before kernel got to
updating count, I'm guessing it is guaranteed to preserve old value?

>
> That being said, the condition 'if (count)' is wrong and I think it
> should be removed.

So count is mandatory, right? In that case both `if (count)` checks are wrong.

>
> >
> > > +
> > > +       return ret;
> > > +}
> > > +
> >
> > [...]
Brian Vazquez Jan. 14, 2020, 7:24 p.m. UTC | #4
On Tue, Jan 14, 2020 at 11:13 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jan 14, 2020 at 10:54 AM Brian Vazquez <brianvv@google.com> wrote:
> >
> > On Tue, Jan 14, 2020 at 10:36 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Tue, Jan 14, 2020 at 8:46 AM Brian Vazquez <brianvv@google.com> wrote:
> > > >
> > > > From: Yonghong Song <yhs@fb.com>
> > > >
> > > > Added four libbpf API functions to support map batch operations:
> > > >   . int bpf_map_delete_batch( ... )
> > > >   . int bpf_map_lookup_batch( ... )
> > > >   . int bpf_map_lookup_and_delete_batch( ... )
> > > >   . int bpf_map_update_batch( ... )
> > > >
> > > > Signed-off-by: Yonghong Song <yhs@fb.com>
> > > > ---
> > > >  tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
> > > >  tools/lib/bpf/bpf.h      | 22 +++++++++++++++
> > > >  tools/lib/bpf/libbpf.map |  4 +++
> > > >  3 files changed, 86 insertions(+)
> > > >
> > > > diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> > > > index 500afe478e94a..12ce8d275f7dc 100644
> > > > --- a/tools/lib/bpf/bpf.c
> > > > +++ b/tools/lib/bpf/bpf.c
> > > > @@ -452,6 +452,66 @@ int bpf_map_freeze(int fd)
> > > >         return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
> > > >  }
> > > >
> > > > +static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
> > > > +                               void *out_batch, void *keys, void *values,
> > > > +                               __u32 *count,
> > > > +                               const struct bpf_map_batch_opts *opts)
> > > > +{
> > > > +       union bpf_attr attr = {};
> > > > +       int ret;
> > > > +
> > > > +       if (!OPTS_VALID(opts, bpf_map_batch_opts))
> > > > +               return -EINVAL;
> > > > +
> > > > +       memset(&attr, 0, sizeof(attr));
> > > > +       attr.batch.map_fd = fd;
> > > > +       attr.batch.in_batch = ptr_to_u64(in_batch);
> > > > +       attr.batch.out_batch = ptr_to_u64(out_batch);
> > > > +       attr.batch.keys = ptr_to_u64(keys);
> > > > +       attr.batch.values = ptr_to_u64(values);
> > > > +       if (count)
> > > > +               attr.batch.count = *count;
> > > > +       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
> > > > +       attr.batch.flags = OPTS_GET(opts, flags, 0);
> > > > +
> > > > +       ret = sys_bpf(cmd, &attr, sizeof(attr));
> > > > +       if (count)
> > > > +               *count = attr.batch.count;
> > >
> > > what if syscall failed, do you still want to assign *count then?
> >
> > Hi Andrii, thanks for taking a look.
> >
> > attr.batch.count should report the number of entries correctly
> > processed before finding and error, an example could be when you
> > provided a buffer for 3 entries and the map only has 1, ret is going
> > to be -ENOENT meaning that you traversed the map and you still want to
> > assign *count.
>
> ah, ok, tricky semantics :) if syscall failed before kernel got to
> updating count, I'm guessing it is guaranteed to preserve old value?
>
I think for correctness as a first step inside the syscall we should
update count to 0 and copy back to user, so we never preserve the old
value and we can trust what count is reporting. WDYT?
> >
> > That being said, the condition 'if (count)' is wrong and I think it
> > should be removed.
>
> So count is mandatory, right? In that case both `if (count)` checks are wrong.
Yes, you are right. I'll remove them in next version.
>
> >
> > >
> > > > +
> > > > +       return ret;
> > > > +}
> > > > +
> > >
> > > [...]
Yonghong Song Jan. 14, 2020, 9:33 p.m. UTC | #5
On 1/14/20 11:13 AM, Andrii Nakryiko wrote:
> On Tue, Jan 14, 2020 at 10:54 AM Brian Vazquez <brianvv@google.com> wrote:
>>
>> On Tue, Jan 14, 2020 at 10:36 AM Andrii Nakryiko
>> <andrii.nakryiko@gmail.com> wrote:
>>>
>>> On Tue, Jan 14, 2020 at 8:46 AM Brian Vazquez <brianvv@google.com> wrote:
>>>>
>>>> From: Yonghong Song <yhs@fb.com>
>>>>
>>>> Added four libbpf API functions to support map batch operations:
>>>>    . int bpf_map_delete_batch( ... )
>>>>    . int bpf_map_lookup_batch( ... )
>>>>    . int bpf_map_lookup_and_delete_batch( ... )
>>>>    . int bpf_map_update_batch( ... )
>>>>
>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>>   tools/lib/bpf/bpf.c      | 60 ++++++++++++++++++++++++++++++++++++++++
>>>>   tools/lib/bpf/bpf.h      | 22 +++++++++++++++
>>>>   tools/lib/bpf/libbpf.map |  4 +++
>>>>   3 files changed, 86 insertions(+)
>>>>
>>>> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
>>>> index 500afe478e94a..12ce8d275f7dc 100644
>>>> --- a/tools/lib/bpf/bpf.c
>>>> +++ b/tools/lib/bpf/bpf.c
>>>> @@ -452,6 +452,66 @@ int bpf_map_freeze(int fd)
>>>>          return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
>>>>   }
>>>>
>>>> +static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
>>>> +                               void *out_batch, void *keys, void *values,
>>>> +                               __u32 *count,
>>>> +                               const struct bpf_map_batch_opts *opts)
>>>> +{
>>>> +       union bpf_attr attr = {};
>>>> +       int ret;
>>>> +
>>>> +       if (!OPTS_VALID(opts, bpf_map_batch_opts))
>>>> +               return -EINVAL;
>>>> +
>>>> +       memset(&attr, 0, sizeof(attr));
>>>> +       attr.batch.map_fd = fd;
>>>> +       attr.batch.in_batch = ptr_to_u64(in_batch);
>>>> +       attr.batch.out_batch = ptr_to_u64(out_batch);
>>>> +       attr.batch.keys = ptr_to_u64(keys);
>>>> +       attr.batch.values = ptr_to_u64(values);
>>>> +       if (count)
>>>> +               attr.batch.count = *count;
>>>> +       attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
>>>> +       attr.batch.flags = OPTS_GET(opts, flags, 0);
>>>> +
>>>> +       ret = sys_bpf(cmd, &attr, sizeof(attr));
>>>> +       if (count)
>>>> +               *count = attr.batch.count;
>>>
>>> what if syscall failed, do you still want to assign *count then?
>>
>> Hi Andrii, thanks for taking a look.
>>
>> attr.batch.count should report the number of entries correctly
>> processed before finding and error, an example could be when you
>> provided a buffer for 3 entries and the map only has 1, ret is going
>> to be -ENOENT meaning that you traversed the map and you still want to
>> assign *count.
> 
> ah, ok, tricky semantics :) if syscall failed before kernel got to
> updating count, I'm guessing it is guaranteed to preserve old value?
> 
>>
>> That being said, the condition 'if (count)' is wrong and I think it
>> should be removed.
> 
> So count is mandatory, right? In that case both `if (count)` checks are wrong.

A little bit history here. Some of early iterations may have operations 
like:
    delete the whole hash table
    delete the hash table starting from a key to the end.
    etc.
In such cases, user may not pass 'count' to kernel.

Now we do not support such scenarios and all supported cases
in this patch set requires 'count', so yes, we can make `count'
mandatory.

> 
>>
>>>
>>>> +
>>>> +       return ret;
>>>> +}
>>>> +
>>>
>>> [...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 500afe478e94a..12ce8d275f7dc 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -452,6 +452,66 @@  int bpf_map_freeze(int fd)
 	return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
 }
 
+static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
+				void *out_batch, void *keys, void *values,
+				__u32 *count,
+				const struct bpf_map_batch_opts *opts)
+{
+	union bpf_attr attr = {};
+	int ret;
+
+	if (!OPTS_VALID(opts, bpf_map_batch_opts))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.batch.map_fd = fd;
+	attr.batch.in_batch = ptr_to_u64(in_batch);
+	attr.batch.out_batch = ptr_to_u64(out_batch);
+	attr.batch.keys = ptr_to_u64(keys);
+	attr.batch.values = ptr_to_u64(values);
+	if (count)
+		attr.batch.count = *count;
+	attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
+	attr.batch.flags = OPTS_GET(opts, flags, 0);
+
+	ret = sys_bpf(cmd, &attr, sizeof(attr));
+	if (count)
+		*count = attr.batch.count;
+
+	return ret;
+}
+
+int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
+				    NULL, keys, NULL, count, opts);
+}
+
+int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
+			 void *values, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
+				    out_batch, keys, values, count, opts);
+}
+
+int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
+				    void *keys, void *values, __u32 *count,
+				    const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
+				    fd, in_batch, out_batch, keys, values,
+				    count, opts);
+}
+
+int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+			 const struct bpf_map_batch_opts *opts)
+{
+	return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
+				    keys, values, count, opts);
+}
+
 int bpf_obj_pin(int fd, const char *pathname)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 56341d117e5ba..b976e77316cca 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -127,6 +127,28 @@  LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);
+
+struct bpf_map_batch_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u64 elem_flags;
+	__u64 flags;
+};
+#define bpf_map_batch_opts__last_field flags
+
+LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+				    __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
+				    void *keys, void *values, __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
+					void *out_batch, void *keys,
+					void *values, __u32 *count,
+					const struct bpf_map_batch_opts *opts);
+LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+				    __u32 *count,
+				    const struct bpf_map_batch_opts *opts);
+
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
 LIBBPF_API int bpf_obj_get(const char *pathname);
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a19f04e6e3d96..1902a0fc6afcc 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -214,6 +214,10 @@  LIBBPF_0.0.7 {
 		btf_dump__emit_type_decl;
 		bpf_link__disconnect;
 		bpf_map__attach_struct_ops;
+		bpf_map_delete_batch;
+		bpf_map_lookup_and_delete_batch;
+		bpf_map_lookup_batch;
+		bpf_map_update_batch;
 		bpf_object__find_program_by_name;
 		bpf_object__attach_skeleton;
 		bpf_object__destroy_skeleton;