diff mbox series

[RFC,3/5] ebpf: Added declaration/initialization routines.

Message ID 20230330001522.120774-4-andrew@daynix.com
State New
Headers show
Series eBPF RSS through QMP support. | expand

Commit Message

Andrew Melnichenko March 30, 2023, 12:15 a.m. UTC
Now, the binary objects may be retrieved by id/name.
It would require for future qmp commands that may require specific
eBPF blob.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
---
 ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
 ebpf/ebpf_rss.c  |  4 ++++
 ebpf/meson.build |  1 +
 4 files changed, 78 insertions(+)
 create mode 100644 ebpf/ebpf.c
 create mode 100644 ebpf/ebpf.h

Comments

Jason Wang March 30, 2023, 6:54 a.m. UTC | #1
On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
>
> Now, the binary objects may be retrieved by id/name.
> It would require for future qmp commands that may require specific
> eBPF blob.
>
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> ---
>  ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
>  ebpf/ebpf_rss.c  |  4 ++++
>  ebpf/meson.build |  1 +
>  4 files changed, 78 insertions(+)
>  create mode 100644 ebpf/ebpf.c
>  create mode 100644 ebpf/ebpf.h
>
> diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
> new file mode 100644
> index 0000000000..86320d72f5
> --- /dev/null
> +++ b/ebpf/ebpf.c
> @@ -0,0 +1,48 @@
> +/*
> + * QEMU eBPF binary declaration routine.
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + *  Andrew Melnychenko <andrew@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/queue.h"
> +#include "ebpf/ebpf.h"
> +
> +struct ElfBinaryDataEntry {
> +    const char *id;
> +    const void * (*fn)(size_t *);
> +
> +    QSLIST_ENTRY(ElfBinaryDataEntry) node;
> +};
> +
> +static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
> +                                            QSLIST_HEAD_INITIALIZER();
> +
> +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
> +{
> +    struct ElfBinaryDataEntry *data = NULL;
> +
> +    data = g_malloc0(sizeof(*data));
> +    data->fn = fn;
> +    data->id = id;
> +
> +    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
> +}
> +
> +const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
> +{
> +    struct ElfBinaryDataEntry *it = NULL;
> +    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
> +        if (strcmp(id, it->id) == 0) {
> +            return it->fn(sz);
> +        }
> +    }
> +
> +    return NULL;
> +}
> diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
> new file mode 100644
> index 0000000000..fd705cb73e
> --- /dev/null
> +++ b/ebpf/ebpf.h
> @@ -0,0 +1,25 @@
> +/*
> + * QEMU eBPF binary declaration routine.
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + *  Andrew Melnychenko <andrew@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef EBPF_H
> +#define EBPF_H
> +
> +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
> +const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
> +
> +#define ebpf_binary_init(id, fn)                                           \
> +static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
> +{                                                                          \
> +    ebpf_register_binary_data(id, fn);                                     \
> +}
> +
> +#endif /* EBPF_H */
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index 08015fecb1..b4038725f2 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -21,6 +21,8 @@
>
>  #include "ebpf/ebpf_rss.h"
>  #include "ebpf/rss.bpf.skeleton.h"
> +#include "ebpf/ebpf.h"
> +
>  #include "trace.h"
>
>  void ebpf_rss_init(struct EBPFRSSContext *ctx)
> @@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
>      ctx->obj = NULL;
>      ctx->program_fd = -1;
>  }
> +
> +ebpf_binary_init("rss", rss_bpf__elf_bytes)

Who or how the ABI compatibility is preserved between libvirt and Qemu?

Thanks

> diff --git a/ebpf/meson.build b/ebpf/meson.build
> index 2dd0fd8948..67c3f53aa9 100644
> --- a/ebpf/meson.build
> +++ b/ebpf/meson.build
> @@ -1 +1,2 @@
> +softmmu_ss.add(files('ebpf.c'))
>  softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
> --
> 2.39.1
>
Daniel P. Berrangé March 30, 2023, 8:33 a.m. UTC | #2
On Thu, Mar 30, 2023 at 03:15:20AM +0300, Andrew Melnychenko wrote:
> Now, the binary objects may be retrieved by id/name.
> It would require for future qmp commands that may require specific
> eBPF blob.
> 
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> ---
>  ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
>  ebpf/ebpf_rss.c  |  4 ++++
>  ebpf/meson.build |  1 +
>  4 files changed, 78 insertions(+)
>  create mode 100644 ebpf/ebpf.c
>  create mode 100644 ebpf/ebpf.h
> 
> diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
> new file mode 100644
> index 0000000000..86320d72f5
> --- /dev/null
> +++ b/ebpf/ebpf.c
> @@ -0,0 +1,48 @@
> +/*
> + * QEMU eBPF binary declaration routine.
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + *  Andrew Melnychenko <andrew@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/queue.h"
> +#include "ebpf/ebpf.h"
> +
> +struct ElfBinaryDataEntry {
> +    const char *id;
> +    const void * (*fn)(size_t *);

It feels odd to be storing the function there, as that's just
an artifact of how the EBPF rss program is acquired. IMHO
this should just be

   const void *data;
   size_t datalen;

> +
> +    QSLIST_ENTRY(ElfBinaryDataEntry) node;
> +};
> +
> +static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
> +                                            QSLIST_HEAD_INITIALIZER();
> +
> +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
> +{
> +    struct ElfBinaryDataEntry *data = NULL;
> +
> +    data = g_malloc0(sizeof(*data));

We prefer g_new0 over g_malloc and initialize when declaring eg

    struct ElfBinaryDataEntry *data = g_new0(struct ElfBinaryDataEntry, 1);

> +    data->fn = fn;
> +    data->id = id;
> +
> +    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
> +}
> +
> +const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
> +{
> +    struct ElfBinaryDataEntry *it = NULL;
> +    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
> +        if (strcmp(id, it->id) == 0) {
> +            return it->fn(sz);
> +        }
> +    }
> +
> +    return NULL;
> +}
> diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
> new file mode 100644
> index 0000000000..fd705cb73e
> --- /dev/null
> +++ b/ebpf/ebpf.h
> @@ -0,0 +1,25 @@
> +/*
> + * QEMU eBPF binary declaration routine.
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + *  Andrew Melnychenko <andrew@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef EBPF_H
> +#define EBPF_H
> +
> +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));

IMHO it would be better as

void ebpf_register_binary_data(const char *id, const void *data, size_t datalen);


> +const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
> +
> +#define ebpf_binary_init(id, fn)                                           \
> +static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
> +{                                                                          \
> +    ebpf_register_binary_data(id, fn);                                     \

size_t datalen;
const void *data = fn(&datalen);
ebpf_register_binary_data(oid, data, datalen);


> +}
> +
> +#endif /* EBPF_H */
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index 08015fecb1..b4038725f2 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -21,6 +21,8 @@
>  
>  #include "ebpf/ebpf_rss.h"
>  #include "ebpf/rss.bpf.skeleton.h"
> +#include "ebpf/ebpf.h"
> +
>  #include "trace.h"
>  
>  void ebpf_rss_init(struct EBPFRSSContext *ctx)
> @@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
>      ctx->obj = NULL;
>      ctx->program_fd = -1;
>  }
> +
> +ebpf_binary_init("rss", rss_bpf__elf_bytes)
> diff --git a/ebpf/meson.build b/ebpf/meson.build
> index 2dd0fd8948..67c3f53aa9 100644
> --- a/ebpf/meson.build
> +++ b/ebpf/meson.build
> @@ -1 +1,2 @@
> +softmmu_ss.add(files('ebpf.c'))
>  softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
> -- 
> 2.39.1
> 

With regards,
Daniel
Daniel P. Berrangé March 30, 2023, 8:34 a.m. UTC | #3
On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> >
> > Now, the binary objects may be retrieved by id/name.
> > It would require for future qmp commands that may require specific
> > eBPF blob.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >  ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
> >  ebpf/ebpf_rss.c  |  4 ++++
> >  ebpf/meson.build |  1 +
> >  4 files changed, 78 insertions(+)
> >  create mode 100644 ebpf/ebpf.c
> >  create mode 100644 ebpf/ebpf.h
> >
> > diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
> > new file mode 100644
> > index 0000000000..86320d72f5
> > --- /dev/null
> > +++ b/ebpf/ebpf.c
> > @@ -0,0 +1,48 @@
> > +/*
> > + * QEMU eBPF binary declaration routine.
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/queue.h"
> > +#include "ebpf/ebpf.h"
> > +
> > +struct ElfBinaryDataEntry {
> > +    const char *id;
> > +    const void * (*fn)(size_t *);
> > +
> > +    QSLIST_ENTRY(ElfBinaryDataEntry) node;
> > +};
> > +
> > +static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
> > +                                            QSLIST_HEAD_INITIALIZER();
> > +
> > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
> > +{
> > +    struct ElfBinaryDataEntry *data = NULL;
> > +
> > +    data = g_malloc0(sizeof(*data));
> > +    data->fn = fn;
> > +    data->id = id;
> > +
> > +    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
> > +}
> > +
> > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
> > +{
> > +    struct ElfBinaryDataEntry *it = NULL;
> > +    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
> > +        if (strcmp(id, it->id) == 0) {
> > +            return it->fn(sz);
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
> > new file mode 100644
> > index 0000000000..fd705cb73e
> > --- /dev/null
> > +++ b/ebpf/ebpf.h
> > @@ -0,0 +1,25 @@
> > +/*
> > + * QEMU eBPF binary declaration routine.
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#ifndef EBPF_H
> > +#define EBPF_H
> > +
> > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
> > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
> > +
> > +#define ebpf_binary_init(id, fn)                                           \
> > +static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
> > +{                                                                          \
> > +    ebpf_register_binary_data(id, fn);                                     \
> > +}
> > +
> > +#endif /* EBPF_H */
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index 08015fecb1..b4038725f2 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -21,6 +21,8 @@
> >
> >  #include "ebpf/ebpf_rss.h"
> >  #include "ebpf/rss.bpf.skeleton.h"
> > +#include "ebpf/ebpf.h"
> > +
> >  #include "trace.h"
> >
> >  void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > @@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> >      ctx->obj = NULL;
> >      ctx->program_fd = -1;
> >  }
> > +
> > +ebpf_binary_init("rss", rss_bpf__elf_bytes)
> 
> Who or how the ABI compatibility is preserved between libvirt and Qemu?

There's no real problem with binary compatibility to solve any more.

When libvirt first launches a QEMU VM, it will fetch the eBPF programs
it needs from that running QEMU using QMP. WHen it later needs to
enable features that use eBPF, it already has the program data that
matches the running QEMU


With regards,
Daniel
Andrew Melnichenko March 30, 2023, 11:02 a.m. UTC | #4
Hi all,

On Thu, Mar 30, 2023 at 11:33 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Thu, Mar 30, 2023 at 03:15:20AM +0300, Andrew Melnychenko wrote:
> > Now, the binary objects may be retrieved by id/name.
> > It would require for future qmp commands that may require specific
> > eBPF blob.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >  ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
> >  ebpf/ebpf_rss.c  |  4 ++++
> >  ebpf/meson.build |  1 +
> >  4 files changed, 78 insertions(+)
> >  create mode 100644 ebpf/ebpf.c
> >  create mode 100644 ebpf/ebpf.h
> >
> > diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
> > new file mode 100644
> > index 0000000000..86320d72f5
> > --- /dev/null
> > +++ b/ebpf/ebpf.c
> > @@ -0,0 +1,48 @@
> > +/*
> > + * QEMU eBPF binary declaration routine.
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/queue.h"
> > +#include "ebpf/ebpf.h"
> > +
> > +struct ElfBinaryDataEntry {
> > +    const char *id;
> > +    const void * (*fn)(size_t *);
>
> It feels odd to be storing the function there, as that's just
> an artifact of how the EBPF rss program is acquired. IMHO
> this should just be
>
>    const void *data;
>    size_t datalen;
>

Yeah, have an idea to store data/size instead. the skeleton provides
them in function,
didn't want to make dirty hacks for the constructor. So I've passed
the function straight to
the structure.

> > +
> > +    QSLIST_ENTRY(ElfBinaryDataEntry) node;
> > +};
> > +
> > +static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
> > +                                            QSLIST_HEAD_INITIALIZER();
> > +
> > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
> > +{
> > +    struct ElfBinaryDataEntry *data = NULL;
> > +
> > +    data = g_malloc0(sizeof(*data));
>
> We prefer g_new0 over g_malloc and initialize when declaring eg
>
>     struct ElfBinaryDataEntry *data = g_new0(struct ElfBinaryDataEntry, 1);
>

Ok, thank you.

> > +    data->fn = fn;
> > +    data->id = id;
> > +
> > +    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
> > +}
> > +
> > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
> > +{
> > +    struct ElfBinaryDataEntry *it = NULL;
> > +    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
> > +        if (strcmp(id, it->id) == 0) {
> > +            return it->fn(sz);
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
> > new file mode 100644
> > index 0000000000..fd705cb73e
> > --- /dev/null
> > +++ b/ebpf/ebpf.h
> > @@ -0,0 +1,25 @@
> > +/*
> > + * QEMU eBPF binary declaration routine.
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#ifndef EBPF_H
> > +#define EBPF_H
> > +
> > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
>
> IMHO it would be better as
>
> void ebpf_register_binary_data(const char *id, const void *data, size_t datalen);
>
>
> > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
> > +
> > +#define ebpf_binary_init(id, fn)                                           \
> > +static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
> > +{                                                                          \
> > +    ebpf_register_binary_data(id, fn);                                     \
>
> size_t datalen;
> const void *data = fn(&datalen);
> ebpf_register_binary_data(oid, data, datalen);
>

Yeah, it can work like that. For some reason, I want that register
function and the constructor
have the same call/mention.

>
> > +}
> > +
> > +#endif /* EBPF_H */
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index 08015fecb1..b4038725f2 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -21,6 +21,8 @@
> >
> >  #include "ebpf/ebpf_rss.h"
> >  #include "ebpf/rss.bpf.skeleton.h"
> > +#include "ebpf/ebpf.h"
> > +
> >  #include "trace.h"
> >
> >  void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > @@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> >      ctx->obj = NULL;
> >      ctx->program_fd = -1;
> >  }
> > +
> > +ebpf_binary_init("rss", rss_bpf__elf_bytes)
> > diff --git a/ebpf/meson.build b/ebpf/meson.build
> > index 2dd0fd8948..67c3f53aa9 100644
> > --- a/ebpf/meson.build
> > +++ b/ebpf/meson.build
> > @@ -1 +1,2 @@
> > +softmmu_ss.add(files('ebpf.c'))
> >  softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
> > --
> > 2.39.1
> >
>
> With regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
Jason Wang March 31, 2023, 7:48 a.m. UTC | #5
On Thu, Mar 30, 2023 at 4:34 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > >
> > > Now, the binary objects may be retrieved by id/name.
> > > It would require for future qmp commands that may require specific
> > > eBPF blob.
> > >
> > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > ---
> > >  ebpf/ebpf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
> > >  ebpf/ebpf.h      | 25 +++++++++++++++++++++++++
> > >  ebpf/ebpf_rss.c  |  4 ++++
> > >  ebpf/meson.build |  1 +
> > >  4 files changed, 78 insertions(+)
> > >  create mode 100644 ebpf/ebpf.c
> > >  create mode 100644 ebpf/ebpf.h
> > >
> > > diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
> > > new file mode 100644
> > > index 0000000000..86320d72f5
> > > --- /dev/null
> > > +++ b/ebpf/ebpf.c
> > > @@ -0,0 +1,48 @@
> > > +/*
> > > + * QEMU eBPF binary declaration routine.
> > > + *
> > > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > + *
> > > + * Authors:
> > > + *  Andrew Melnychenko <andrew@daynix.com>
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > > + * later.  See the COPYING file in the top-level directory.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "qemu/queue.h"
> > > +#include "ebpf/ebpf.h"
> > > +
> > > +struct ElfBinaryDataEntry {
> > > +    const char *id;
> > > +    const void * (*fn)(size_t *);
> > > +
> > > +    QSLIST_ENTRY(ElfBinaryDataEntry) node;
> > > +};
> > > +
> > > +static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
> > > +                                            QSLIST_HEAD_INITIALIZER();
> > > +
> > > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
> > > +{
> > > +    struct ElfBinaryDataEntry *data = NULL;
> > > +
> > > +    data = g_malloc0(sizeof(*data));
> > > +    data->fn = fn;
> > > +    data->id = id;
> > > +
> > > +    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
> > > +}
> > > +
> > > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
> > > +{
> > > +    struct ElfBinaryDataEntry *it = NULL;
> > > +    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
> > > +        if (strcmp(id, it->id) == 0) {
> > > +            return it->fn(sz);
> > > +        }
> > > +    }
> > > +
> > > +    return NULL;
> > > +}
> > > diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
> > > new file mode 100644
> > > index 0000000000..fd705cb73e
> > > --- /dev/null
> > > +++ b/ebpf/ebpf.h
> > > @@ -0,0 +1,25 @@
> > > +/*
> > > + * QEMU eBPF binary declaration routine.
> > > + *
> > > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > + *
> > > + * Authors:
> > > + *  Andrew Melnychenko <andrew@daynix.com>
> > > + *
> > > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > > + * later.  See the COPYING file in the top-level directory.
> > > + */
> > > +
> > > +#ifndef EBPF_H
> > > +#define EBPF_H
> > > +
> > > +void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
> > > +const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
> > > +
> > > +#define ebpf_binary_init(id, fn)                                           \
> > > +static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
> > > +{                                                                          \
> > > +    ebpf_register_binary_data(id, fn);                                     \
> > > +}
> > > +
> > > +#endif /* EBPF_H */
> > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > index 08015fecb1..b4038725f2 100644
> > > --- a/ebpf/ebpf_rss.c
> > > +++ b/ebpf/ebpf_rss.c
> > > @@ -21,6 +21,8 @@
> > >
> > >  #include "ebpf/ebpf_rss.h"
> > >  #include "ebpf/rss.bpf.skeleton.h"
> > > +#include "ebpf/ebpf.h"
> > > +
> > >  #include "trace.h"
> > >
> > >  void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > @@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > >      ctx->obj = NULL;
> > >      ctx->program_fd = -1;
> > >  }
> > > +
> > > +ebpf_binary_init("rss", rss_bpf__elf_bytes)
> >
> > Who or how the ABI compatibility is preserved between libvirt and Qemu?
>
> There's no real problem with binary compatibility to solve any more.
>
> When libvirt first launches a QEMU VM, it will fetch the eBPF programs
> it needs from that running QEMU using QMP. WHen it later needs to
> enable features that use eBPF, it already has the program data that
> matches the running QEMU

Ok, then who will validate the eBPF program? I don't think libvirt can
trust what is received from Qemu otherwise arbitrary eBPF programs
could be executed by Qemu in this way. One example is that when guests
escape to Qemu it can modify the rss_bpf__elf_bytes. Though
BPF_PROG_TYPE_SOCKET_FILTER gives some of the restrictions, we still
need to evaluate side effects of this. Or we need to find other ways
like using the binary in libvirt or use rx filter events.

Thanks

>
>
> With regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
Daniel P. Berrangé March 31, 2023, 7:59 a.m. UTC | #6
On Fri, Mar 31, 2023 at 03:48:18PM +0800, Jason Wang wrote:
> On Thu, Mar 30, 2023 at 4:34 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >
> > On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> > > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > >
> > > Who or how the ABI compatibility is preserved between libvirt and Qemu?
> >
> > There's no real problem with binary compatibility to solve any more.
> >
> > When libvirt first launches a QEMU VM, it will fetch the eBPF programs
> > it needs from that running QEMU using QMP. WHen it later needs to
> > enable features that use eBPF, it already has the program data that
> > matches the running QEMU
> 
> Ok, then who will validate the eBPF program? I don't think libvirt can
> trust what is received from Qemu otherwise arbitrary eBPF programs
> could be executed by Qemu in this way. One example is that when guests
> escape to Qemu it can modify the rss_bpf__elf_bytes. Though
> BPF_PROG_TYPE_SOCKET_FILTER gives some of the restrictions, we still
> need to evaluate side effects of this. Or we need to find other ways
> like using the binary in libvirt or use rx filter events.

As I mentioned, when libvirt first launches QEMU it will fetch the
eBPF programs and keep them for later use. At that point the guest
CPUs haven't started running, and so QEMU it still sufficiently
trustworthy.

With regards,
Daniel
Jason Wang March 31, 2023, 8:03 a.m. UTC | #7
On Fri, Mar 31, 2023 at 3:59 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Fri, Mar 31, 2023 at 03:48:18PM +0800, Jason Wang wrote:
> > On Thu, Mar 30, 2023 at 4:34 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >
> > > On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> > > > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > >
> > > > Who or how the ABI compatibility is preserved between libvirt and Qemu?
> > >
> > > There's no real problem with binary compatibility to solve any more.
> > >
> > > When libvirt first launches a QEMU VM, it will fetch the eBPF programs
> > > it needs from that running QEMU using QMP. WHen it later needs to
> > > enable features that use eBPF, it already has the program data that
> > > matches the running QEMU
> >
> > Ok, then who will validate the eBPF program? I don't think libvirt can
> > trust what is received from Qemu otherwise arbitrary eBPF programs
> > could be executed by Qemu in this way. One example is that when guests
> > escape to Qemu it can modify the rss_bpf__elf_bytes. Though
> > BPF_PROG_TYPE_SOCKET_FILTER gives some of the restrictions, we still
> > need to evaluate side effects of this. Or we need to find other ways
> > like using the binary in libvirt or use rx filter events.
>
> As I mentioned, when libvirt first launches QEMU it will fetch the
> eBPF programs and keep them for later use. At that point the guest
> CPUs haven't started running, and so QEMU it still sufficiently
> trustworthy.

Well, this means the QMP command is safe only before Qemu starts to
run VCPU. I'm not sure this is a good design. Or at least we need to
fail the QMP command if VCPU starts to run.

Thanks

>
> With regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
Daniel P. Berrangé March 31, 2023, 8:13 a.m. UTC | #8
On Fri, Mar 31, 2023 at 04:03:39PM +0800, Jason Wang wrote:
> On Fri, Mar 31, 2023 at 3:59 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >
> > On Fri, Mar 31, 2023 at 03:48:18PM +0800, Jason Wang wrote:
> > > On Thu, Mar 30, 2023 at 4:34 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > >
> > > > On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> > > > > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > > >
> > > > > Who or how the ABI compatibility is preserved between libvirt and Qemu?
> > > >
> > > > There's no real problem with binary compatibility to solve any more.
> > > >
> > > > When libvirt first launches a QEMU VM, it will fetch the eBPF programs
> > > > it needs from that running QEMU using QMP. WHen it later needs to
> > > > enable features that use eBPF, it already has the program data that
> > > > matches the running QEMU
> > >
> > > Ok, then who will validate the eBPF program? I don't think libvirt can
> > > trust what is received from Qemu otherwise arbitrary eBPF programs
> > > could be executed by Qemu in this way. One example is that when guests
> > > escape to Qemu it can modify the rss_bpf__elf_bytes. Though
> > > BPF_PROG_TYPE_SOCKET_FILTER gives some of the restrictions, we still
> > > need to evaluate side effects of this. Or we need to find other ways
> > > like using the binary in libvirt or use rx filter events.
> >
> > As I mentioned, when libvirt first launches QEMU it will fetch the
> > eBPF programs and keep them for later use. At that point the guest
> > CPUs haven't started running, and so QEMU it still sufficiently
> > trustworthy.
> 
> Well, this means the QMP command is safe only before Qemu starts to
> run VCPU. I'm not sure this is a good design. Or at least we need to
> fail the QMP command if VCPU starts to run.

Currently QEMU has the ability to just create the eBPF programs itself
at will, when it is launched in a privileged scenario regardless of
guest CPU state. In terms of QMP, the reporting of QEMU PIDs for its
various vCPU, I/O threads is also not to be trusted after vCPU starts
if the guest workload is not trustworthy.  I feel this is more of docs
problem to explain the caveats that apps should be aware of.


With regards,
Daniel
Jason Wang March 31, 2023, 8:21 a.m. UTC | #9
On Fri, Mar 31, 2023 at 4:13 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Fri, Mar 31, 2023 at 04:03:39PM +0800, Jason Wang wrote:
> > On Fri, Mar 31, 2023 at 3:59 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >
> > > On Fri, Mar 31, 2023 at 03:48:18PM +0800, Jason Wang wrote:
> > > > On Thu, Mar 30, 2023 at 4:34 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > > >
> > > > > On Thu, Mar 30, 2023 at 02:54:32PM +0800, Jason Wang wrote:
> > > > > > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > > > >
> > > > > > Who or how the ABI compatibility is preserved between libvirt and Qemu?
> > > > >
> > > > > There's no real problem with binary compatibility to solve any more.
> > > > >
> > > > > When libvirt first launches a QEMU VM, it will fetch the eBPF programs
> > > > > it needs from that running QEMU using QMP. WHen it later needs to
> > > > > enable features that use eBPF, it already has the program data that
> > > > > matches the running QEMU
> > > >
> > > > Ok, then who will validate the eBPF program? I don't think libvirt can
> > > > trust what is received from Qemu otherwise arbitrary eBPF programs
> > > > could be executed by Qemu in this way. One example is that when guests
> > > > escape to Qemu it can modify the rss_bpf__elf_bytes. Though
> > > > BPF_PROG_TYPE_SOCKET_FILTER gives some of the restrictions, we still
> > > > need to evaluate side effects of this. Or we need to find other ways
> > > > like using the binary in libvirt or use rx filter events.
> > >
> > > As I mentioned, when libvirt first launches QEMU it will fetch the
> > > eBPF programs and keep them for later use. At that point the guest
> > > CPUs haven't started running, and so QEMU it still sufficiently
> > > trustworthy.
> >
> > Well, this means the QMP command is safe only before Qemu starts to
> > run VCPU. I'm not sure this is a good design. Or at least we need to
> > fail the QMP command if VCPU starts to run.
>
> Currently QEMU has the ability to just create the eBPF programs itself
> at will, when it is launched in a privileged scenario regardless of
> guest CPU state. In terms of QMP, the reporting of QEMU PIDs for its
> various vCPU, I/O threads is also not to be trusted after vCPU starts
> if the guest workload is not trustworthy.

Indeed.

> I feel this is more of docs
> problem to explain the caveats that apps should be aware of.

Ok, we can probably document this and in the future we probably need
to address them.

Thanks

>
>
> With regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
diff mbox series

Patch

diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
new file mode 100644
index 0000000000..86320d72f5
--- /dev/null
+++ b/ebpf/ebpf.c
@@ -0,0 +1,48 @@ 
+/*
+ * QEMU eBPF binary declaration routine.
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "ebpf/ebpf.h"
+
+struct ElfBinaryDataEntry {
+    const char *id;
+    const void * (*fn)(size_t *);
+
+    QSLIST_ENTRY(ElfBinaryDataEntry) node;
+};
+
+static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
+                                            QSLIST_HEAD_INITIALIZER();
+
+void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
+{
+    struct ElfBinaryDataEntry *data = NULL;
+
+    data = g_malloc0(sizeof(*data));
+    data->fn = fn;
+    data->id = id;
+
+    QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, data, node);
+}
+
+const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
+{
+    struct ElfBinaryDataEntry *it = NULL;
+    QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
+        if (strcmp(id, it->id) == 0) {
+            return it->fn(sz);
+        }
+    }
+
+    return NULL;
+}
diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
new file mode 100644
index 0000000000..fd705cb73e
--- /dev/null
+++ b/ebpf/ebpf.h
@@ -0,0 +1,25 @@ 
+/*
+ * QEMU eBPF binary declaration routine.
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef EBPF_H
+#define EBPF_H
+
+void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
+const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
+
+#define ebpf_binary_init(id, fn)                                           \
+static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void)     \
+{                                                                          \
+    ebpf_register_binary_data(id, fn);                                     \
+}
+
+#endif /* EBPF_H */
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index 08015fecb1..b4038725f2 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -21,6 +21,8 @@ 
 
 #include "ebpf/ebpf_rss.h"
 #include "ebpf/rss.bpf.skeleton.h"
+#include "ebpf/ebpf.h"
+
 #include "trace.h"
 
 void ebpf_rss_init(struct EBPFRSSContext *ctx)
@@ -237,3 +239,5 @@  void ebpf_rss_unload(struct EBPFRSSContext *ctx)
     ctx->obj = NULL;
     ctx->program_fd = -1;
 }
+
+ebpf_binary_init("rss", rss_bpf__elf_bytes)
diff --git a/ebpf/meson.build b/ebpf/meson.build
index 2dd0fd8948..67c3f53aa9 100644
--- a/ebpf/meson.build
+++ b/ebpf/meson.build
@@ -1 +1,2 @@ 
+softmmu_ss.add(files('ebpf.c'))
 softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))