diff mbox series

[06/18] bpf: Add bpf_ksym_tree tree

Message ID 20200216193005.144157-7-jolsa@kernel.org
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: Add trampoline and dispatcher to /proc/kallsyms | expand

Commit Message

Jiri Olsa Feb. 16, 2020, 7:29 p.m. UTC
The bpf_tree is used both for kallsyms iterations and searching
for exception tables of bpf programs, which is needed only for
bpf programs.

Adding bpf_ksym_tree that will hold symbols for all bpf_prog
bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
only for bpf_prog objects to keep it fast.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h |  1 +
 kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 55 insertions(+), 6 deletions(-)

Comments

Daniel Borkmann Feb. 18, 2020, 10:34 p.m. UTC | #1
On 2/16/20 8:29 PM, Jiri Olsa wrote:
> The bpf_tree is used both for kallsyms iterations and searching
> for exception tables of bpf programs, which is needed only for
> bpf programs.
> 
> Adding bpf_ksym_tree that will hold symbols for all bpf_prog
> bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
> only for bpf_prog objects to keep it fast.
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>   include/linux/bpf.h |  1 +
>   kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 55 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f1174d24c185..5d6649cdc3df 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -468,6 +468,7 @@ struct bpf_ksym {
>   	unsigned long		 end;
>   	char			 name[KSYM_NAME_LEN];
>   	struct list_head	 lnode;
> +	struct latch_tree_node	 tnode;
>   };
>   
>   enum bpf_tramp_prog_type {
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 604093d2153a..9fb08b4d01f7 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -606,8 +606,46 @@ static const struct latch_tree_ops bpf_tree_ops = {
>   	.comp	= bpf_tree_comp,
>   };
>   
> +static unsigned long
> +bpf_get_ksym_start(struct latch_tree_node *n)
> +{
> +	const struct bpf_ksym *ksym;
> +
> +	ksym = container_of(n, struct bpf_ksym, tnode);
> +	return ksym->start;

Small nit, can be simplified to:

	return container_of(n, struct bpf_ksym, tnode)->start;

> +}
> +
> +static bool
> +bpf_ksym_tree_less(struct latch_tree_node *a,
> +		   struct latch_tree_node *b)
> +{
> +	return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
> +}
> +
> +static int
> +bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
> +{
> +	unsigned long val = (unsigned long)key;
> +	const struct bpf_ksym *ksym;
> +
> +	ksym = container_of(n, struct bpf_ksym, tnode);
> +
> +	if (val < ksym->start)
> +		return -1;
> +	if (val >= ksym->end)
> +		return  1;
> +
> +	return 0;
> +}
> +
> +static const struct latch_tree_ops bpf_ksym_tree_ops = {
> +	.less	= bpf_ksym_tree_less,
> +	.comp	= bpf_ksym_tree_comp,
> +};
> +
>   static DEFINE_SPINLOCK(bpf_lock);
>   static LIST_HEAD(bpf_kallsyms);
> +static struct latch_tree_root bpf_ksym_tree __cacheline_aligned;
>   static struct latch_tree_root bpf_tree __cacheline_aligned;

You mention in your commit description performance being the reason on why
we need two latch trees. Can't we maintain everything just in a single one?

What does "to keep it fast" mean here in absolute numbers that would affect
overall system performance? It feels a bit like premature optimization with
the above rationale as-is.

If it is about differentiating the different bpf_ksym symbols for some of the
kallsym handling functions (?), can't we simply add an enum bpf_ksym_type {
BPF_SYM_PROGRAM, BPF_SYM_TRAMPOLINE, BPF_SYM_DISPATCHER } instead, but still
maintain them all in a single latch tree?

Thanks,
Daniel
Jiri Olsa Feb. 19, 2020, 8:41 a.m. UTC | #2
On Tue, Feb 18, 2020 at 11:34:47PM +0100, Daniel Borkmann wrote:
> On 2/16/20 8:29 PM, Jiri Olsa wrote:
> > The bpf_tree is used both for kallsyms iterations and searching
> > for exception tables of bpf programs, which is needed only for
> > bpf programs.
> > 
> > Adding bpf_ksym_tree that will hold symbols for all bpf_prog
> > bpf_trampoline and bpf_dispatcher objects and keeping bpf_tree
> > only for bpf_prog objects to keep it fast.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >   include/linux/bpf.h |  1 +
> >   kernel/bpf/core.c   | 60 ++++++++++++++++++++++++++++++++++++++++-----
> >   2 files changed, 55 insertions(+), 6 deletions(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index f1174d24c185..5d6649cdc3df 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -468,6 +468,7 @@ struct bpf_ksym {
> >   	unsigned long		 end;
> >   	char			 name[KSYM_NAME_LEN];
> >   	struct list_head	 lnode;
> > +	struct latch_tree_node	 tnode;
> >   };
> >   enum bpf_tramp_prog_type {
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 604093d2153a..9fb08b4d01f7 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -606,8 +606,46 @@ static const struct latch_tree_ops bpf_tree_ops = {
> >   	.comp	= bpf_tree_comp,
> >   };
> > +static unsigned long
> > +bpf_get_ksym_start(struct latch_tree_node *n)
> > +{
> > +	const struct bpf_ksym *ksym;
> > +
> > +	ksym = container_of(n, struct bpf_ksym, tnode);
> > +	return ksym->start;
> 
> Small nit, can be simplified to:
> 
> 	return container_of(n, struct bpf_ksym, tnode)->start;

ok

> 
> > +}
> > +
> > +static bool
> > +bpf_ksym_tree_less(struct latch_tree_node *a,
> > +		   struct latch_tree_node *b)
> > +{
> > +	return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
> > +}
> > +
> > +static int
> > +bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
> > +{
> > +	unsigned long val = (unsigned long)key;
> > +	const struct bpf_ksym *ksym;
> > +
> > +	ksym = container_of(n, struct bpf_ksym, tnode);
> > +
> > +	if (val < ksym->start)
> > +		return -1;
> > +	if (val >= ksym->end)
> > +		return  1;
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct latch_tree_ops bpf_ksym_tree_ops = {
> > +	.less	= bpf_ksym_tree_less,
> > +	.comp	= bpf_ksym_tree_comp,
> > +};
> > +
> >   static DEFINE_SPINLOCK(bpf_lock);
> >   static LIST_HEAD(bpf_kallsyms);
> > +static struct latch_tree_root bpf_ksym_tree __cacheline_aligned;
> >   static struct latch_tree_root bpf_tree __cacheline_aligned;
> 
> You mention in your commit description performance being the reason on why
> we need two latch trees. Can't we maintain everything just in a single one?
> 
> What does "to keep it fast" mean here in absolute numbers that would affect
> overall system performance? It feels a bit like premature optimization with
> the above rationale as-is.
> 
> If it is about differentiating the different bpf_ksym symbols for some of the
> kallsym handling functions (?), can't we simply add an enum bpf_ksym_type {
> BPF_SYM_PROGRAM, BPF_SYM_TRAMPOLINE, BPF_SYM_DISPATCHER } instead, but still
> maintain them all in a single latch tree?

the motivation is that up to now stored in the tree only bpf_prog objects,
and the tree was used both for kallsym and exception table lookups
(in search_bpf_extables function)

but if we'd add trampoline and fispatcher objects to the same tree, then
the exception table lookups would suffer from having to traverse more
objects within the search, hence the separation of the trees

I don't have any performance numbers supporting this, just the rationale
above

jirka
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f1174d24c185..5d6649cdc3df 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -468,6 +468,7 @@  struct bpf_ksym {
 	unsigned long		 end;
 	char			 name[KSYM_NAME_LEN];
 	struct list_head	 lnode;
+	struct latch_tree_node	 tnode;
 };
 
 enum bpf_tramp_prog_type {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 604093d2153a..9fb08b4d01f7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -606,8 +606,46 @@  static const struct latch_tree_ops bpf_tree_ops = {
 	.comp	= bpf_tree_comp,
 };
 
+static unsigned long
+bpf_get_ksym_start(struct latch_tree_node *n)
+{
+	const struct bpf_ksym *ksym;
+
+	ksym = container_of(n, struct bpf_ksym, tnode);
+	return ksym->start;
+}
+
+static bool
+bpf_ksym_tree_less(struct latch_tree_node *a,
+		   struct latch_tree_node *b)
+{
+	return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
+}
+
+static int
+bpf_ksym_tree_comp(void *key, struct latch_tree_node *n)
+{
+	unsigned long val = (unsigned long)key;
+	const struct bpf_ksym *ksym;
+
+	ksym = container_of(n, struct bpf_ksym, tnode);
+
+	if (val < ksym->start)
+		return -1;
+	if (val >= ksym->end)
+		return  1;
+
+	return 0;
+}
+
+static const struct latch_tree_ops bpf_ksym_tree_ops = {
+	.less	= bpf_ksym_tree_less,
+	.comp	= bpf_ksym_tree_comp,
+};
+
 static DEFINE_SPINLOCK(bpf_lock);
 static LIST_HEAD(bpf_kallsyms);
+static struct latch_tree_root bpf_ksym_tree __cacheline_aligned;
 static struct latch_tree_root bpf_tree __cacheline_aligned;
 
 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
@@ -615,6 +653,7 @@  static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
 	WARN_ON_ONCE(!list_empty(&aux->ksym.lnode));
 	list_add_tail_rcu(&aux->ksym.lnode, &bpf_kallsyms);
 	latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_insert(&aux->ksym.tnode, &bpf_ksym_tree, &bpf_ksym_tree_ops);
 }
 
 static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
@@ -623,6 +662,7 @@  static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
 		return;
 
 	latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+	latch_tree_erase(&aux->ksym.tnode, &bpf_ksym_tree, &bpf_ksym_tree_ops);
 	list_del_rcu(&aux->ksym.lnode);
 }
 
@@ -671,19 +711,27 @@  static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
 	       NULL;
 }
 
+static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
+{
+	struct latch_tree_node *n;
+
+	n = latch_tree_find((void *)addr, &bpf_ksym_tree, &bpf_ksym_tree_ops);
+	return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
+}
+
 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 				 unsigned long *off, char *sym)
 {
-	struct bpf_prog *prog;
+	struct bpf_ksym *ksym;
 	char *ret = NULL;
 
 	rcu_read_lock();
-	prog = bpf_prog_kallsyms_find(addr);
-	if (prog) {
-		unsigned long symbol_start = prog->aux->ksym.start;
-		unsigned long symbol_end = prog->aux->ksym.end;
+	ksym = bpf_ksym_find(addr);
+	if (ksym) {
+		unsigned long symbol_start = ksym->start;
+		unsigned long symbol_end = ksym->end;
 
-		strncpy(sym, prog->aux->ksym.name, KSYM_NAME_LEN);
+		strncpy(sym, ksym->name, KSYM_NAME_LEN);
 
 		ret = sym;
 		if (size)