diff mbox series

[03/18] bpf: Add struct bpf_ksym

Message ID 20200216193005.144157-4-jolsa@kernel.org
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: Add trampoline and dispatcher to /proc/kallsyms | expand

Commit Message

Jiri Olsa Feb. 16, 2020, 7:29 p.m. UTC
Adding 'struct bpf_ksym' object that will carry the
kallsym information for bpf symbol. Adding the start
and end address to begin with. It will be used by
bpf_prog, bpf_trampoline, bpf_dispatcher.

Using the bpf_func for program symbol start instead
of the image start, because it will be used later for
kallsyms program value and it makes no difference
(compared to the image start) for sorting bpf programs.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  6 ++++++
 kernel/bpf/core.c   | 26 +++++++++++---------------
 2 files changed, 17 insertions(+), 15 deletions(-)

Comments

Daniel Borkmann Feb. 18, 2020, 11:03 p.m. UTC | #1
On 2/16/20 8:29 PM, Jiri Olsa wrote:
> Adding 'struct bpf_ksym' object that will carry the
> kallsym information for bpf symbol. Adding the start
> and end address to begin with. It will be used by
> bpf_prog, bpf_trampoline, bpf_dispatcher.
> 
> Using the bpf_func for program symbol start instead
> of the image start, because it will be used later for
> kallsyms program value and it makes no difference
> (compared to the image start) for sorting bpf programs.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>   include/linux/bpf.h |  6 ++++++
>   kernel/bpf/core.c   | 26 +++++++++++---------------
>   2 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index be7afccc9459..5ad8eea1cd37 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -462,6 +462,11 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
>   u64 notrace __bpf_prog_enter(void);
>   void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
>   
> +struct bpf_ksym {
> +	unsigned long		 start;
> +	unsigned long		 end;
> +};
> +
>   enum bpf_tramp_prog_type {
>   	BPF_TRAMP_FENTRY,
>   	BPF_TRAMP_FEXIT,
> @@ -643,6 +648,7 @@ struct bpf_prog_aux {
>   	u32 size_poke_tab;
>   	struct latch_tree_node ksym_tnode;
>   	struct list_head ksym_lnode;
> +	struct bpf_ksym ksym;
>   	const struct bpf_prog_ops *ops;
>   	struct bpf_map **used_maps;
>   	struct bpf_prog *prog;
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 973a20d49749..39a9e4184900 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -524,17 +524,15 @@ int bpf_jit_harden   __read_mostly;
>   long bpf_jit_limit   __read_mostly;
>   
>   static __always_inline void
> -bpf_get_prog_addr_region(const struct bpf_prog *prog,
> -			 unsigned long *symbol_start,
> -			 unsigned long *symbol_end)
> +bpf_get_prog_addr_region(const struct bpf_prog *prog)
>   {
>   	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
>   	unsigned long addr = (unsigned long)hdr;
>   
>   	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
>   
> -	*symbol_start = addr;
> -	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
> +	prog->aux->ksym.start = (unsigned long) prog->bpf_func;

Your commit descriptions are too terse. :/ What does "because it will be used
later for kallsyms program value" mean exactly compared to how it's used today
for programs?

Is this a requirement to have them point exactly to prog->bpf_func and if so
why? My concern is that bpf_func has a random offset from hdr, so even if the
/proc/kallsyms would be readable with concrete addresses for !cap_sys_admin
users, it's still not the concrete start address being exposed there, but the
allocated range instead.

> +	prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
>   }
>
Jiri Olsa Feb. 19, 2020, 9:17 a.m. UTC | #2
On Wed, Feb 19, 2020 at 12:03:49AM +0100, Daniel Borkmann wrote:
> On 2/16/20 8:29 PM, Jiri Olsa wrote:
> > Adding 'struct bpf_ksym' object that will carry the
> > kallsym information for bpf symbol. Adding the start
> > and end address to begin with. It will be used by
> > bpf_prog, bpf_trampoline, bpf_dispatcher.
> > 
> > Using the bpf_func for program symbol start instead
> > of the image start, because it will be used later for
> > kallsyms program value and it makes no difference
> > (compared to the image start) for sorting bpf programs.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >   include/linux/bpf.h |  6 ++++++
> >   kernel/bpf/core.c   | 26 +++++++++++---------------
> >   2 files changed, 17 insertions(+), 15 deletions(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index be7afccc9459..5ad8eea1cd37 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -462,6 +462,11 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
> >   u64 notrace __bpf_prog_enter(void);
> >   void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
> > +struct bpf_ksym {
> > +	unsigned long		 start;
> > +	unsigned long		 end;
> > +};
> > +
> >   enum bpf_tramp_prog_type {
> >   	BPF_TRAMP_FENTRY,
> >   	BPF_TRAMP_FEXIT,
> > @@ -643,6 +648,7 @@ struct bpf_prog_aux {
> >   	u32 size_poke_tab;
> >   	struct latch_tree_node ksym_tnode;
> >   	struct list_head ksym_lnode;
> > +	struct bpf_ksym ksym;
> >   	const struct bpf_prog_ops *ops;
> >   	struct bpf_map **used_maps;
> >   	struct bpf_prog *prog;
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 973a20d49749..39a9e4184900 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -524,17 +524,15 @@ int bpf_jit_harden   __read_mostly;
> >   long bpf_jit_limit   __read_mostly;
> >   static __always_inline void
> > -bpf_get_prog_addr_region(const struct bpf_prog *prog,
> > -			 unsigned long *symbol_start,
> > -			 unsigned long *symbol_end)
> > +bpf_get_prog_addr_region(const struct bpf_prog *prog)
> >   {
> >   	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
> >   	unsigned long addr = (unsigned long)hdr;
> >   	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
> > -	*symbol_start = addr;
> > -	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
> > +	prog->aux->ksym.start = (unsigned long) prog->bpf_func;
> 
> Your commit descriptions are too terse. :/ What does "because it will be used
> later for kallsyms program value" mean exactly compared to how it's used today
> for programs?

there's symbol_start/symbol_end values originally used to sort
bpf_prog objects, and there's prog->bpf_func value used as address
that is displayed in the /proc/kallsyms

I'm putting prog->bpf_func to bpf_ksym->start, so it's later on
displayed as bpf_prog address in /proc/kallsyms in this patch:

	bpf: Add lnode list node to struct bpf_ksym
	---
	@@ -736,13 +736,13 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
			return ret;
	 
		rcu_read_lock();
	-       list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
	+       list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
			if (it++ != symnum)
				continue;
	 
	-               strncpy(sym, aux->ksym.name, KSYM_NAME_LEN);
	+               strncpy(sym, ksym->name, KSYM_NAME_LEN);
	 
	-               *value = (unsigned long)aux->prog->bpf_func;
	+               *value = ksym->start;
			*type  = BPF_SYM_ELF_TYPE;


and also the prog->bpf_func value is now used as memory 'start' to
sort bpf_prog objects, which will do the same job as symbol_start

but maybe we could have 'kallsym' value in 'bpf_ksym' which would be
used as value to display in /proc/kallsyms kallsyms, like:

  struct bpf_ksym {
    unsigned long  start;
    unsigned long  end;
    unsigned long  kallsyms;
  }

and keep 'start/end' to be the whole memory bounds for sorting to
avoid any confusion and surprises in future

> 
> Is this a requirement to have them point exactly to prog->bpf_func and if so
> why? My concern is that bpf_func has a random offset from hdr, so even if the
> /proc/kallsyms would be readable with concrete addresses for !cap_sys_admin
> users, it's still not the concrete start address being exposed there, but the
> allocated range instead.

there was last review suggestion from Andrii to display the address
of the actual code start for trampolines and dispatchers instead
of the start of the who;e memory image, which is actually what we
need for perf

jirka
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be7afccc9459..5ad8eea1cd37 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -462,6 +462,11 @@  int arch_prepare_bpf_trampoline(void *image, void *image_end,
 u64 notrace __bpf_prog_enter(void);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 
+struct bpf_ksym {
+	unsigned long		 start;
+	unsigned long		 end;
+};
+
 enum bpf_tramp_prog_type {
 	BPF_TRAMP_FENTRY,
 	BPF_TRAMP_FEXIT,
@@ -643,6 +648,7 @@  struct bpf_prog_aux {
 	u32 size_poke_tab;
 	struct latch_tree_node ksym_tnode;
 	struct list_head ksym_lnode;
+	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 973a20d49749..39a9e4184900 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -524,17 +524,15 @@  int bpf_jit_harden   __read_mostly;
 long bpf_jit_limit   __read_mostly;
 
 static __always_inline void
-bpf_get_prog_addr_region(const struct bpf_prog *prog,
-			 unsigned long *symbol_start,
-			 unsigned long *symbol_end)
+bpf_get_prog_addr_region(const struct bpf_prog *prog)
 {
 	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
 	unsigned long addr = (unsigned long)hdr;
 
 	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
 
-	*symbol_start = addr;
-	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
+	prog->aux->ksym.start = (unsigned long) prog->bpf_func;
+	prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
 }
 
 void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
@@ -575,13 +573,10 @@  void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 static __always_inline unsigned long
 bpf_get_prog_addr_start(struct latch_tree_node *n)
 {
-	unsigned long symbol_start, symbol_end;
 	const struct bpf_prog_aux *aux;
 
 	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
-	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
-
-	return symbol_start;
+	return aux->ksym.start;
 }
 
 static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
@@ -593,15 +588,13 @@  static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
 {
 	unsigned long val = (unsigned long)key;
-	unsigned long symbol_start, symbol_end;
 	const struct bpf_prog_aux *aux;
 
 	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
-	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
 
-	if (val < symbol_start)
+	if (val < aux->ksym.start)
 		return -1;
-	if (val >= symbol_end)
+	if (val >= aux->ksym.end)
 		return  1;
 
 	return 0;
@@ -649,6 +642,8 @@  void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 	    !capable(CAP_SYS_ADMIN))
 		return;
 
+	bpf_get_prog_addr_region(fp);
+
 	spin_lock_bh(&bpf_lock);
 	bpf_prog_ksym_node_add(fp->aux);
 	spin_unlock_bh(&bpf_lock);
@@ -677,14 +672,15 @@  static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 				 unsigned long *off, char *sym)
 {
-	unsigned long symbol_start, symbol_end;
 	struct bpf_prog *prog;
 	char *ret = NULL;
 
 	rcu_read_lock();
 	prog = bpf_prog_kallsyms_find(addr);
 	if (prog) {
-		bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
+		unsigned long symbol_start = prog->aux->ksym.start;
+		unsigned long symbol_end = prog->aux->ksym.end;
+
 		bpf_get_prog_name(prog, sym);
 
 		ret = sym;