diff mbox series

[bpf-next,3/5] btf: allow to customize dedup hash table size

Message ID 20190227224642.1069138-4-andriin@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series btf_dedup algorithm and test fixes | expand

Commit Message

Andrii Nakryiko Feb. 27, 2019, 10:46 p.m. UTC
Default size of dedup table (16k) is good enough for most binaries, even
typical vmlinux images. But there are cases of binaries with huge amount
of BTF types (e.g., allyesconfig variants of kernel), which benefit from
having bigger dedup table size to lower amount of unnecessary hash
collisions. Tools like pahole, thus, can tune this parameter to reach
optimal performance.

This change also serves double purpose of allowing tests to force hash
collisions to test some corner cases, used in follow up patch.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
 tools/lib/bpf/btf.h |  1 +
 2 files changed, 27 insertions(+), 17 deletions(-)

Comments

Song Liu Feb. 28, 2019, 6:27 p.m. UTC | #1
On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@fb.com> wrote:
>
> Default size of dedup table (16k) is good enough for most binaries, even
> typical vmlinux images. But there are cases of binaries with huge amount
> of BTF types (e.g., allyesconfig variants of kernel), which benefit from
> having bigger dedup table size to lower amount of unnecessary hash
> collisions. Tools like pahole, thus, can tune this parameter to reach
> optimal performance.
>
> This change also serves double purpose of allowing tests to force hash
> collisions to test some corner cases, used in follow up patch.
>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>  tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
>  tools/lib/bpf/btf.h |  1 +
>  2 files changed, 27 insertions(+), 17 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 68b50e9bbde1..6bbb710216e6 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
>         return err;
>  }
>
> -#define BTF_DEDUP_TABLE_SIZE_LOG 14
> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
>  #define BTF_UNPROCESSED_ID ((__u32)-1)
>  #define BTF_IN_PROGRESS_ID ((__u32)-2)
>
> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
>  #undef GOLDEN_RATIO_PRIME
>  }
>
> -#define for_each_hash_node(table, hash, node) \
> -       for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
> +#define for_each_dedup_cand(d, hash, node) \
> +       for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
> +            node;                                                         \
> +            node = node->next)
>
>  static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
>  {
>         struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
> +       int bucket = hash & (d->opts.dedup_table_size - 1);
>
>         if (!node)
>                 return -ENOMEM;
>         node->type_id = type_id;
> -       node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
> -       d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
> +       node->next = d->dedup_table[bucket];
> +       d->dedup_table[bucket] = node;
>         return 0;
>  }
>
> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
>         if (!d->dedup_table)
>                 return;
>
> -       for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
> +       for (i = 0; i < d->opts.dedup_table_size; i++) {
>                 while (d->dedup_table[i]) {
>                         tmp = d->dedup_table[i];
>                         d->dedup_table[i] = tmp->next;
> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>         if (!d)
>                 return ERR_PTR(-ENOMEM);
>
> +       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> +       /* ensure table size is power of two and limit to 2G */
> +       d->opts.dedup_table_size = opts && opts->dedup_table_size
> +               ? opts->dedup_table_size
> +               : BTF_DEDUP_TABLE_DEFAULT_SIZE;
> +       for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
> +               ;
> +       d->opts.dedup_table_size = 1 << i;
> +
So this is the roundup log2 logic? How about we define some marcos
to make it cleaner? Like

#define BTF_DEDUP_TABLE_MAX_SIZE xxxx

Also, how big hash table do we need for allyesconfig? 2G seems really
big to me.

>         d->btf = btf;
>         d->btf_ext = btf_ext;
>
> -       d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
> +       d->dedup_table = calloc(d->opts.dedup_table_size,
>                                 sizeof(struct btf_dedup_node *));
>         if (!d->dedup_table) {
>                 err = -ENOMEM;
> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>         for (i = 0; i <= btf->nr_types; i++)
>                 d->hypot_map[i] = BTF_UNPROCESSED_ID;
>
> -       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> -
>  done:
>         if (err) {
>                 btf_dedup_free(d);
> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>
>         case BTF_KIND_INT:
>                 h = btf_hash_int(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_int(t, cand)) {
>                                 new_id = cand_node->type_id;
> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>
>         case BTF_KIND_ENUM:
>                 h = btf_hash_enum(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_enum(t, cand)) {
>                                 new_id = cand_node->type_id;
> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>
>         case BTF_KIND_FWD:
>                 h = btf_hash_common(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_common(t, cand)) {
>                                 new_id = cand_node->type_id;
> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
>                 return 0;
>
>         h = btf_hash_struct(t);
> -       for_each_hash_node(d->dedup_table, h, cand_node) {
> +       for_each_dedup_cand(d, h, cand_node) {
>                 int eq;
>
>                 btf_dedup_clear_hypot_map(d);
> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>                 t->type = ref_type_id;
>
>                 h = btf_hash_common(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_common(t, cand)) {
>                                 new_id = cand_node->type_id;
> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>                 info->index_type = ref_type_id;
>
>                 h = btf_hash_array(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_array(t, cand)) {
>                                 new_id = cand_node->type_id;
> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>                 }
>
>                 h = btf_hash_fnproto(t);
> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> +               for_each_dedup_cand(d, h, cand_node) {
>                         cand = d->btf->types[cand_node->type_id];
>                         if (btf_equal_fnproto(t, cand)) {
>                                 new_id = cand_node->type_id;
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index b60bb7cf5fff..28a1e1e59861 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
>  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
>
>  struct btf_dedup_opts {
> +       unsigned int dedup_table_size;
>         bool dont_resolve_fwds;
>  };
>
> --
> 2.17.1
>
Andrii Nakryiko Feb. 28, 2019, 6:51 p.m. UTC | #2
On Thu, Feb 28, 2019 at 10:27 AM Song Liu <liu.song.a23@gmail.com> wrote:
>
> On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@fb.com> wrote:
> >
> > Default size of dedup table (16k) is good enough for most binaries, even
> > typical vmlinux images. But there are cases of binaries with huge amount
> > of BTF types (e.g., allyesconfig variants of kernel), which benefit from
> > having bigger dedup table size to lower amount of unnecessary hash
> > collisions. Tools like pahole, thus, can tune this parameter to reach
> > optimal performance.
> >
> > This change also serves double purpose of allowing tests to force hash
> > collisions to test some corner cases, used in follow up patch.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >  tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
> >  tools/lib/bpf/btf.h |  1 +
> >  2 files changed, 27 insertions(+), 17 deletions(-)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 68b50e9bbde1..6bbb710216e6 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
> >         return err;
> >  }
> >
> > -#define BTF_DEDUP_TABLE_SIZE_LOG 14
> > -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
> > +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
> >  #define BTF_UNPROCESSED_ID ((__u32)-1)
> >  #define BTF_IN_PROGRESS_ID ((__u32)-2)
> >
> > @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
> >  #undef GOLDEN_RATIO_PRIME
> >  }
> >
> > -#define for_each_hash_node(table, hash, node) \
> > -       for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
> > +#define for_each_dedup_cand(d, hash, node) \
> > +       for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
> > +            node;                                                         \
> > +            node = node->next)
> >
> >  static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
> >  {
> >         struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
> > +       int bucket = hash & (d->opts.dedup_table_size - 1);
> >
> >         if (!node)
> >                 return -ENOMEM;
> >         node->type_id = type_id;
> > -       node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
> > -       d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
> > +       node->next = d->dedup_table[bucket];
> > +       d->dedup_table[bucket] = node;
> >         return 0;
> >  }
> >
> > @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
> >         if (!d->dedup_table)
> >                 return;
> >
> > -       for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
> > +       for (i = 0; i < d->opts.dedup_table_size; i++) {
> >                 while (d->dedup_table[i]) {
> >                         tmp = d->dedup_table[i];
> >                         d->dedup_table[i] = tmp->next;
> > @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >         if (!d)
> >                 return ERR_PTR(-ENOMEM);
> >
> > +       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> > +       /* ensure table size is power of two and limit to 2G */
> > +       d->opts.dedup_table_size = opts && opts->dedup_table_size
> > +               ? opts->dedup_table_size
> > +               : BTF_DEDUP_TABLE_DEFAULT_SIZE;
> > +       for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
> > +               ;
> > +       d->opts.dedup_table_size = 1 << i;
> > +
> So this is the roundup log2 logic? How about we define some marcos
> to make it cleaner? Like
>
> #define BTF_DEDUP_TABLE_MAX_SIZE xxxx

You mean hide this loop behind macro? Or specify max size as a macro?
If former, I'd rather do static function, something like
roundup_pow_of_2_with_max?

>
> Also, how big hash table do we need for allyesconfig? 2G seems really
> big to me.

It works even with 16k and takes about 45 seconds. I didn't want to
artificially limit this to something small and left it up to user.
This algorithm can be used for arbitrary binaries after pahole's
dwarf2btf conversion, which could be much bigger than kernel, so I
didn't want to artificially limit this. Realistically, unlikely that
you'll need more than 64k-128k.

>
> >         d->btf = btf;
> >         d->btf_ext = btf_ext;
> >
> > -       d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
> > +       d->dedup_table = calloc(d->opts.dedup_table_size,
> >                                 sizeof(struct btf_dedup_node *));
> >         if (!d->dedup_table) {
> >                 err = -ENOMEM;
> > @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >         for (i = 0; i <= btf->nr_types; i++)
> >                 d->hypot_map[i] = BTF_UNPROCESSED_ID;
> >
> > -       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> > -
> >  done:
> >         if (err) {
> >                 btf_dedup_free(d);
> > @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >         case BTF_KIND_INT:
> >                 h = btf_hash_int(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_int(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >         case BTF_KIND_ENUM:
> >                 h = btf_hash_enum(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_enum(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >         case BTF_KIND_FWD:
> >                 h = btf_hash_common(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_common(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
> >                 return 0;
> >
> >         h = btf_hash_struct(t);
> > -       for_each_hash_node(d->dedup_table, h, cand_node) {
> > +       for_each_dedup_cand(d, h, cand_node) {
> >                 int eq;
> >
> >                 btf_dedup_clear_hypot_map(d);
> > @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >                 t->type = ref_type_id;
> >
> >                 h = btf_hash_common(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_common(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >                 info->index_type = ref_type_id;
> >
> >                 h = btf_hash_array(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_array(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >                 }
> >
> >                 h = btf_hash_fnproto(t);
> > -               for_each_hash_node(d->dedup_table, h, cand_node) {
> > +               for_each_dedup_cand(d, h, cand_node) {
> >                         cand = d->btf->types[cand_node->type_id];
> >                         if (btf_equal_fnproto(t, cand)) {
> >                                 new_id = cand_node->type_id;
> > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > index b60bb7cf5fff..28a1e1e59861 100644
> > --- a/tools/lib/bpf/btf.h
> > +++ b/tools/lib/bpf/btf.h
> > @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
> >  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
> >
> >  struct btf_dedup_opts {
> > +       unsigned int dedup_table_size;
> >         bool dont_resolve_fwds;
> >  };
> >
> > --
> > 2.17.1
> >
Arnaldo Carvalho de Melo Feb. 28, 2019, 6:57 p.m. UTC | #3
Em Wed, Feb 27, 2019 at 02:46:39PM -0800, Andrii Nakryiko escreveu:
> Default size of dedup table (16k) is good enough for most binaries, even
> typical vmlinux images. But there are cases of binaries with huge amount
> of BTF types (e.g., allyesconfig variants of kernel), which benefit from
> having bigger dedup table size to lower amount of unnecessary hash
> collisions. Tools like pahole, thus, can tune this parameter to reach
> optimal performance.
> 
> This change also serves double purpose of allowing tests to force hash
> collisions to test some corner cases, used in follow up patch.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>  tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
>  tools/lib/bpf/btf.h |  1 +
>  2 files changed, 27 insertions(+), 17 deletions(-)
> 
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 68b50e9bbde1..6bbb710216e6 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
>  	return err;
>  }
>  
> -#define BTF_DEDUP_TABLE_SIZE_LOG 14
> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
>  #define BTF_UNPROCESSED_ID ((__u32)-1)
>  #define BTF_IN_PROGRESS_ID ((__u32)-2)
>  
> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
>  #undef GOLDEN_RATIO_PRIME
>  }
>  
> -#define for_each_hash_node(table, hash, node) \
> -	for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
> +#define for_each_dedup_cand(d, hash, node) \
> +	for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
> +	     node;							   \
> +	     node = node->next)
>  
>  static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
>  {
>  	struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
> +	int bucket = hash & (d->opts.dedup_table_size - 1);
>  
>  	if (!node)
>  		return -ENOMEM;
>  	node->type_id = type_id;
> -	node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
> -	d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
> +	node->next = d->dedup_table[bucket];
> +	d->dedup_table[bucket] = node;
>  	return 0;
>  }
>  
> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
>  	if (!d->dedup_table)
>  		return;
>  
> -	for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
> +	for (i = 0; i < d->opts.dedup_table_size; i++) {
>  		while (d->dedup_table[i]) {
>  			tmp = d->dedup_table[i];
>  			d->dedup_table[i] = tmp->next;
> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>  	if (!d)
>  		return ERR_PTR(-ENOMEM);
>  
> +	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;

Is the above line a leftover from some other patch? Doesn't seem related
to this patch.

The rest seems ok.

> +	/* ensure table size is power of two and limit to 2G */
> +	d->opts.dedup_table_size = opts && opts->dedup_table_size
> +		? opts->dedup_table_size
> +		: BTF_DEDUP_TABLE_DEFAULT_SIZE;
> +	for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
> +		;
> +	d->opts.dedup_table_size = 1 << i;
> +
>  	d->btf = btf;
>  	d->btf_ext = btf_ext;
>  
> -	d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
> +	d->dedup_table = calloc(d->opts.dedup_table_size,
>  				sizeof(struct btf_dedup_node *));
>  	if (!d->dedup_table) {
>  		err = -ENOMEM;
> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>  	for (i = 0; i <= btf->nr_types; i++)
>  		d->hypot_map[i] = BTF_UNPROCESSED_ID;
>  
> -	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> -
>  done:
>  	if (err) {
>  		btf_dedup_free(d);
> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>  
>  	case BTF_KIND_INT:
>  		h = btf_hash_int(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_int(t, cand)) {
>  				new_id = cand_node->type_id;
> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>  
>  	case BTF_KIND_ENUM:
>  		h = btf_hash_enum(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_enum(t, cand)) {
>  				new_id = cand_node->type_id;
> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>  
>  	case BTF_KIND_FWD:
>  		h = btf_hash_common(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_common(t, cand)) {
>  				new_id = cand_node->type_id;
> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
>  		return 0;
>  
>  	h = btf_hash_struct(t);
> -	for_each_hash_node(d->dedup_table, h, cand_node) {
> +	for_each_dedup_cand(d, h, cand_node) {
>  		int eq;
>  
>  		btf_dedup_clear_hypot_map(d);
> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>  		t->type = ref_type_id;
>  
>  		h = btf_hash_common(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_common(t, cand)) {
>  				new_id = cand_node->type_id;
> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>  		info->index_type = ref_type_id;
>  
>  		h = btf_hash_array(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_array(t, cand)) {
>  				new_id = cand_node->type_id;
> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>  		}
>  
>  		h = btf_hash_fnproto(t);
> -		for_each_hash_node(d->dedup_table, h, cand_node) {
> +		for_each_dedup_cand(d, h, cand_node) {
>  			cand = d->btf->types[cand_node->type_id];
>  			if (btf_equal_fnproto(t, cand)) {
>  				new_id = cand_node->type_id;
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index b60bb7cf5fff..28a1e1e59861 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
>  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
>  
>  struct btf_dedup_opts {
> +	unsigned int dedup_table_size;
>  	bool dont_resolve_fwds;
>  };
>  
> -- 
> 2.17.1
Song Liu Feb. 28, 2019, 7:02 p.m. UTC | #4
> On Feb 28, 2019, at 10:51 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Thu, Feb 28, 2019 at 10:27 AM Song Liu <liu.song.a23@gmail.com> wrote:
>> 
>> On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@fb.com> wrote:
>>> 
>>> Default size of dedup table (16k) is good enough for most binaries, even
>>> typical vmlinux images. But there are cases of binaries with huge amount
>>> of BTF types (e.g., allyesconfig variants of kernel), which benefit from
>>> having bigger dedup table size to lower amount of unnecessary hash
>>> collisions. Tools like pahole, thus, can tune this parameter to reach
>>> optimal performance.
>>> 
>>> This change also serves double purpose of allowing tests to force hash
>>> collisions to test some corner cases, used in follow up patch.
>>> 
>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>>> ---
>>> tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
>>> tools/lib/bpf/btf.h |  1 +
>>> 2 files changed, 27 insertions(+), 17 deletions(-)
>>> 
>>> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
>>> index 68b50e9bbde1..6bbb710216e6 100644
>>> --- a/tools/lib/bpf/btf.c
>>> +++ b/tools/lib/bpf/btf.c
>>> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
>>>        return err;
>>> }
>>> 
>>> -#define BTF_DEDUP_TABLE_SIZE_LOG 14
>>> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
>>> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
>>> #define BTF_UNPROCESSED_ID ((__u32)-1)
>>> #define BTF_IN_PROGRESS_ID ((__u32)-2)
>>> 
>>> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
>>> #undef GOLDEN_RATIO_PRIME
>>> }
>>> 
>>> -#define for_each_hash_node(table, hash, node) \
>>> -       for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
>>> +#define for_each_dedup_cand(d, hash, node) \
>>> +       for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
>>> +            node;                                                         \
>>> +            node = node->next)
>>> 
>>> static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
>>> {
>>>        struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
>>> +       int bucket = hash & (d->opts.dedup_table_size - 1);
>>> 
>>>        if (!node)
>>>                return -ENOMEM;
>>>        node->type_id = type_id;
>>> -       node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
>>> -       d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
>>> +       node->next = d->dedup_table[bucket];
>>> +       d->dedup_table[bucket] = node;
>>>        return 0;
>>> }
>>> 
>>> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
>>>        if (!d->dedup_table)
>>>                return;
>>> 
>>> -       for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
>>> +       for (i = 0; i < d->opts.dedup_table_size; i++) {
>>>                while (d->dedup_table[i]) {
>>>                        tmp = d->dedup_table[i];
>>>                        d->dedup_table[i] = tmp->next;
>>> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>>>        if (!d)
>>>                return ERR_PTR(-ENOMEM);
>>> 
>>> +       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
>>> +       /* ensure table size is power of two and limit to 2G */
>>> +       d->opts.dedup_table_size = opts && opts->dedup_table_size
>>> +               ? opts->dedup_table_size
>>> +               : BTF_DEDUP_TABLE_DEFAULT_SIZE;
>>> +       for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
>>> +               ;
>>> +       d->opts.dedup_table_size = 1 << i;
>>> +
>> So this is the roundup log2 logic? How about we define some marcos
>> to make it cleaner? Like
>> 
>> #define BTF_DEDUP_TABLE_MAX_SIZE xxxx
> 
> You mean hide this loop behind macro? Or specify max size as a macro?
> If former, I'd rather do static function, something like
> roundup_pow_of_2_with_max?

I meant specify max size as a macro. Doing static function is also a 
good idea. 

> 
>> 
>> Also, how big hash table do we need for allyesconfig? 2G seems really
>> big to me.
> 
> It works even with 16k and takes about 45 seconds. I didn't want to
> artificially limit this to something small and left it up to user.
> This algorithm can be used for arbitrary binaries after pahole's
> dwarf2btf conversion, which could be much bigger than kernel, so I
> didn't want to artificially limit this. Realistically, unlikely that
> you'll need more than 64k-128k.

How about we show some warning like "You are using really big size" for
too big numbers, like 16M+? 

Thanks,
Song

> 
>> 
>>>        d->btf = btf;
>>>        d->btf_ext = btf_ext;
>>> 
>>> -       d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
>>> +       d->dedup_table = calloc(d->opts.dedup_table_size,
>>>                                sizeof(struct btf_dedup_node *));
>>>        if (!d->dedup_table) {
>>>                err = -ENOMEM;
>>> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>>>        for (i = 0; i <= btf->nr_types; i++)
>>>                d->hypot_map[i] = BTF_UNPROCESSED_ID;
>>> 
>>> -       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
>>> -
>>> done:
>>>        if (err) {
>>>                btf_dedup_free(d);
>>> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>> 
>>>        case BTF_KIND_INT:
>>>                h = btf_hash_int(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_int(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>> 
>>>        case BTF_KIND_ENUM:
>>>                h = btf_hash_enum(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_enum(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>> 
>>>        case BTF_KIND_FWD:
>>>                h = btf_hash_common(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_common(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
>>>                return 0;
>>> 
>>>        h = btf_hash_struct(t);
>>> -       for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +       for_each_dedup_cand(d, h, cand_node) {
>>>                int eq;
>>> 
>>>                btf_dedup_clear_hypot_map(d);
>>> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>                t->type = ref_type_id;
>>> 
>>>                h = btf_hash_common(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_common(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>                info->index_type = ref_type_id;
>>> 
>>>                h = btf_hash_array(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_array(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>                }
>>> 
>>>                h = btf_hash_fnproto(t);
>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>                        cand = d->btf->types[cand_node->type_id];
>>>                        if (btf_equal_fnproto(t, cand)) {
>>>                                new_id = cand_node->type_id;
>>> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
>>> index b60bb7cf5fff..28a1e1e59861 100644
>>> --- a/tools/lib/bpf/btf.h
>>> +++ b/tools/lib/bpf/btf.h
>>> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
>>> LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
>>> 
>>> struct btf_dedup_opts {
>>> +       unsigned int dedup_table_size;
>>>        bool dont_resolve_fwds;
>>> };
>>> 
>>> --
>>> 2.17.1
Andrii Nakryiko Feb. 28, 2019, 7:40 p.m. UTC | #5
On Thu, Feb 28, 2019 at 11:02 AM Song Liu <songliubraving@fb.com> wrote:
>
>
>
> > On Feb 28, 2019, at 10:51 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Feb 28, 2019 at 10:27 AM Song Liu <liu.song.a23@gmail.com> wrote:
> >>
> >> On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@fb.com> wrote:
> >>>
> >>> Default size of dedup table (16k) is good enough for most binaries, even
> >>> typical vmlinux images. But there are cases of binaries with huge amount
> >>> of BTF types (e.g., allyesconfig variants of kernel), which benefit from
> >>> having bigger dedup table size to lower amount of unnecessary hash
> >>> collisions. Tools like pahole, thus, can tune this parameter to reach
> >>> optimal performance.
> >>>
> >>> This change also serves double purpose of allowing tests to force hash
> >>> collisions to test some corner cases, used in follow up patch.
> >>>
> >>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> >>> ---
> >>> tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
> >>> tools/lib/bpf/btf.h |  1 +
> >>> 2 files changed, 27 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> >>> index 68b50e9bbde1..6bbb710216e6 100644
> >>> --- a/tools/lib/bpf/btf.c
> >>> +++ b/tools/lib/bpf/btf.c
> >>> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
> >>>        return err;
> >>> }
> >>>
> >>> -#define BTF_DEDUP_TABLE_SIZE_LOG 14
> >>> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
> >>> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
> >>> #define BTF_UNPROCESSED_ID ((__u32)-1)
> >>> #define BTF_IN_PROGRESS_ID ((__u32)-2)
> >>>
> >>> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
> >>> #undef GOLDEN_RATIO_PRIME
> >>> }
> >>>
> >>> -#define for_each_hash_node(table, hash, node) \
> >>> -       for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
> >>> +#define for_each_dedup_cand(d, hash, node) \
> >>> +       for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
> >>> +            node;                                                         \
> >>> +            node = node->next)
> >>>
> >>> static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
> >>> {
> >>>        struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
> >>> +       int bucket = hash & (d->opts.dedup_table_size - 1);
> >>>
> >>>        if (!node)
> >>>                return -ENOMEM;
> >>>        node->type_id = type_id;
> >>> -       node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
> >>> -       d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
> >>> +       node->next = d->dedup_table[bucket];
> >>> +       d->dedup_table[bucket] = node;
> >>>        return 0;
> >>> }
> >>>
> >>> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
> >>>        if (!d->dedup_table)
> >>>                return;
> >>>
> >>> -       for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
> >>> +       for (i = 0; i < d->opts.dedup_table_size; i++) {
> >>>                while (d->dedup_table[i]) {
> >>>                        tmp = d->dedup_table[i];
> >>>                        d->dedup_table[i] = tmp->next;
> >>> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >>>        if (!d)
> >>>                return ERR_PTR(-ENOMEM);
> >>>
> >>> +       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> >>> +       /* ensure table size is power of two and limit to 2G */
> >>> +       d->opts.dedup_table_size = opts && opts->dedup_table_size
> >>> +               ? opts->dedup_table_size
> >>> +               : BTF_DEDUP_TABLE_DEFAULT_SIZE;
> >>> +       for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
> >>> +               ;
> >>> +       d->opts.dedup_table_size = 1 << i;
> >>> +
> >> So this is the roundup log2 logic? How about we define some marcos
> >> to make it cleaner? Like
> >>
> >> #define BTF_DEDUP_TABLE_MAX_SIZE xxxx
> >
> > You mean hide this loop behind macro? Or specify max size as a macro?
> > If former, I'd rather do static function, something like
> > roundup_pow_of_2_with_max?
>
> I meant specify max size as a macro. Doing static function is also a
> good idea.

Ok, will do.

>
> >
> >>
> >> Also, how big hash table do we need for allyesconfig? 2G seems really
> >> big to me.
> >
> > It works even with 16k and takes about 45 seconds. I didn't want to
> > artificially limit this to something small and left it up to user.
> > This algorithm can be used for arbitrary binaries after pahole's
> > dwarf2btf conversion, which could be much bigger than kernel, so I
> > didn't want to artificially limit this. Realistically, unlikely that
> > you'll need more than 64k-128k.
>
> How about we show some warning like "You are using really big size" for
> too big numbers, like 16M+?

Tbh, adding extra arbitrary constant and emitting warning seems
excessive. If it's huge number, allocation will fail and we'll return
error. If it's just big, RSS will be high, and if user cares he should
be able to deduce that he specified unreasonable size. In general, I
doubt this value will be overriden often, most uses will probably just
use default setting.

>
> Thanks,
> Song
>
> >
> >>
> >>>        d->btf = btf;
> >>>        d->btf_ext = btf_ext;
> >>>
> >>> -       d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
> >>> +       d->dedup_table = calloc(d->opts.dedup_table_size,
> >>>                                sizeof(struct btf_dedup_node *));
> >>>        if (!d->dedup_table) {
> >>>                err = -ENOMEM;
> >>> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >>>        for (i = 0; i <= btf->nr_types; i++)
> >>>                d->hypot_map[i] = BTF_UNPROCESSED_ID;
> >>>
> >>> -       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> >>> -
> >>> done:
> >>>        if (err) {
> >>>                btf_dedup_free(d);
> >>> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >>>
> >>>        case BTF_KIND_INT:
> >>>                h = btf_hash_int(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_int(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >>>
> >>>        case BTF_KIND_ENUM:
> >>>                h = btf_hash_enum(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_enum(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >>>
> >>>        case BTF_KIND_FWD:
> >>>                h = btf_hash_common(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_common(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
> >>>                return 0;
> >>>
> >>>        h = btf_hash_struct(t);
> >>> -       for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +       for_each_dedup_cand(d, h, cand_node) {
> >>>                int eq;
> >>>
> >>>                btf_dedup_clear_hypot_map(d);
> >>> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >>>                t->type = ref_type_id;
> >>>
> >>>                h = btf_hash_common(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_common(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >>>                info->index_type = ref_type_id;
> >>>
> >>>                h = btf_hash_array(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_array(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >>>                }
> >>>
> >>>                h = btf_hash_fnproto(t);
> >>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
> >>> +               for_each_dedup_cand(d, h, cand_node) {
> >>>                        cand = d->btf->types[cand_node->type_id];
> >>>                        if (btf_equal_fnproto(t, cand)) {
> >>>                                new_id = cand_node->type_id;
> >>> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> >>> index b60bb7cf5fff..28a1e1e59861 100644
> >>> --- a/tools/lib/bpf/btf.h
> >>> +++ b/tools/lib/bpf/btf.h
> >>> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
> >>> LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
> >>>
> >>> struct btf_dedup_opts {
> >>> +       unsigned int dedup_table_size;
> >>>        bool dont_resolve_fwds;
> >>> };
> >>>
> >>> --
> >>> 2.17.1
>
Song Liu Feb. 28, 2019, 7:56 p.m. UTC | #6
> On Feb 28, 2019, at 11:40 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Thu, Feb 28, 2019 at 11:02 AM Song Liu <songliubraving@fb.com> wrote:
>> 
>> 
>> 
>>> On Feb 28, 2019, at 10:51 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>>> 
>>> On Thu, Feb 28, 2019 at 10:27 AM Song Liu <liu.song.a23@gmail.com> wrote:
>>>> 
>>>> On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@fb.com> wrote:
>>>>> 
>>>>> Default size of dedup table (16k) is good enough for most binaries, even
>>>>> typical vmlinux images. But there are cases of binaries with huge amount
>>>>> of BTF types (e.g., allyesconfig variants of kernel), which benefit from
>>>>> having bigger dedup table size to lower amount of unnecessary hash
>>>>> collisions. Tools like pahole, thus, can tune this parameter to reach
>>>>> optimal performance.
>>>>> 
>>>>> This change also serves double purpose of allowing tests to force hash
>>>>> collisions to test some corner cases, used in follow up patch.
>>>>> 
>>>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>>>>> ---
>>>>> tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
>>>>> tools/lib/bpf/btf.h |  1 +
>>>>> 2 files changed, 27 insertions(+), 17 deletions(-)
>>>>> 
>>>>> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
>>>>> index 68b50e9bbde1..6bbb710216e6 100644
>>>>> --- a/tools/lib/bpf/btf.c
>>>>> +++ b/tools/lib/bpf/btf.c
>>>>> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
>>>>>       return err;
>>>>> }
>>>>> 
>>>>> -#define BTF_DEDUP_TABLE_SIZE_LOG 14
>>>>> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
>>>>> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
>>>>> #define BTF_UNPROCESSED_ID ((__u32)-1)
>>>>> #define BTF_IN_PROGRESS_ID ((__u32)-2)
>>>>> 
>>>>> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
>>>>> #undef GOLDEN_RATIO_PRIME
>>>>> }
>>>>> 
>>>>> -#define for_each_hash_node(table, hash, node) \
>>>>> -       for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
>>>>> +#define for_each_dedup_cand(d, hash, node) \
>>>>> +       for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
>>>>> +            node;                                                         \
>>>>> +            node = node->next)
>>>>> 
>>>>> static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
>>>>> {
>>>>>       struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
>>>>> +       int bucket = hash & (d->opts.dedup_table_size - 1);
>>>>> 
>>>>>       if (!node)
>>>>>               return -ENOMEM;
>>>>>       node->type_id = type_id;
>>>>> -       node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
>>>>> -       d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
>>>>> +       node->next = d->dedup_table[bucket];
>>>>> +       d->dedup_table[bucket] = node;
>>>>>       return 0;
>>>>> }
>>>>> 
>>>>> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
>>>>>       if (!d->dedup_table)
>>>>>               return;
>>>>> 
>>>>> -       for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
>>>>> +       for (i = 0; i < d->opts.dedup_table_size; i++) {
>>>>>               while (d->dedup_table[i]) {
>>>>>                       tmp = d->dedup_table[i];
>>>>>                       d->dedup_table[i] = tmp->next;
>>>>> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>>>>>       if (!d)
>>>>>               return ERR_PTR(-ENOMEM);
>>>>> 
>>>>> +       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
>>>>> +       /* ensure table size is power of two and limit to 2G */
>>>>> +       d->opts.dedup_table_size = opts && opts->dedup_table_size
>>>>> +               ? opts->dedup_table_size
>>>>> +               : BTF_DEDUP_TABLE_DEFAULT_SIZE;
>>>>> +       for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
>>>>> +               ;
>>>>> +       d->opts.dedup_table_size = 1 << i;
>>>>> +
>>>> So this is the roundup log2 logic? How about we define some marcos
>>>> to make it cleaner? Like
>>>> 
>>>> #define BTF_DEDUP_TABLE_MAX_SIZE xxxx
>>> 
>>> You mean hide this loop behind macro? Or specify max size as a macro?
>>> If former, I'd rather do static function, something like
>>> roundup_pow_of_2_with_max?
>> 
>> I meant specify max size as a macro. Doing static function is also a
>> good idea.
> 
> Ok, will do.
> 
>> 
>>> 
>>>> 
>>>> Also, how big hash table do we need for allyesconfig? 2G seems really
>>>> big to me.
>>> 
>>> It works even with 16k and takes about 45 seconds. I didn't want to
>>> artificially limit this to something small and left it up to user.
>>> This algorithm can be used for arbitrary binaries after pahole's
>>> dwarf2btf conversion, which could be much bigger than kernel, so I
>>> didn't want to artificially limit this. Realistically, unlikely that
>>> you'll need more than 64k-128k.
>> 
>> How about we show some warning like "You are using really big size" for
>> too big numbers, like 16M+?
> 
> Tbh, adding extra arbitrary constant and emitting warning seems
> excessive. If it's huge number, allocation will fail and we'll return
> error. If it's just big, RSS will be high, and if user cares he should
> be able to deduce that he specified unreasonable size. In general, I
> doubt this value will be overriden often, most uses will probably just
> use default setting.

Fair enough. Let's go with 2G as-is then. 

> 
>> 
>> Thanks,
>> Song
>> 
>>> 
>>>> 
>>>>>       d->btf = btf;
>>>>>       d->btf_ext = btf_ext;
>>>>> 
>>>>> -       d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
>>>>> +       d->dedup_table = calloc(d->opts.dedup_table_size,
>>>>>                               sizeof(struct btf_dedup_node *));
>>>>>       if (!d->dedup_table) {
>>>>>               err = -ENOMEM;
>>>>> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
>>>>>       for (i = 0; i <= btf->nr_types; i++)
>>>>>               d->hypot_map[i] = BTF_UNPROCESSED_ID;
>>>>> 
>>>>> -       d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
>>>>> -
>>>>> done:
>>>>>       if (err) {
>>>>>               btf_dedup_free(d);
>>>>> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>>>> 
>>>>>       case BTF_KIND_INT:
>>>>>               h = btf_hash_int(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_int(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>>>> 
>>>>>       case BTF_KIND_ENUM:
>>>>>               h = btf_hash_enum(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_enum(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>>>>> 
>>>>>       case BTF_KIND_FWD:
>>>>>               h = btf_hash_common(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_common(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
>>>>>               return 0;
>>>>> 
>>>>>       h = btf_hash_struct(t);
>>>>> -       for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +       for_each_dedup_cand(d, h, cand_node) {
>>>>>               int eq;
>>>>> 
>>>>>               btf_dedup_clear_hypot_map(d);
>>>>> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>>>               t->type = ref_type_id;
>>>>> 
>>>>>               h = btf_hash_common(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_common(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>>>               info->index_type = ref_type_id;
>>>>> 
>>>>>               h = btf_hash_array(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_array(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
>>>>>               }
>>>>> 
>>>>>               h = btf_hash_fnproto(t);
>>>>> -               for_each_hash_node(d->dedup_table, h, cand_node) {
>>>>> +               for_each_dedup_cand(d, h, cand_node) {
>>>>>                       cand = d->btf->types[cand_node->type_id];
>>>>>                       if (btf_equal_fnproto(t, cand)) {
>>>>>                               new_id = cand_node->type_id;
>>>>> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
>>>>> index b60bb7cf5fff..28a1e1e59861 100644
>>>>> --- a/tools/lib/bpf/btf.h
>>>>> +++ b/tools/lib/bpf/btf.h
>>>>> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
>>>>> LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
>>>>> 
>>>>> struct btf_dedup_opts {
>>>>> +       unsigned int dedup_table_size;
>>>>>       bool dont_resolve_fwds;
>>>>> };
>>>>> 
>>>>> --
>>>>> 2.17.1
Andrii Nakryiko Feb. 28, 2019, 8:08 p.m. UTC | #7
On Thu, Feb 28, 2019 at 10:57 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Wed, Feb 27, 2019 at 02:46:39PM -0800, Andrii Nakryiko escreveu:
> > Default size of dedup table (16k) is good enough for most binaries, even
> > typical vmlinux images. But there are cases of binaries with huge amount
> > of BTF types (e.g., allyesconfig variants of kernel), which benefit from
> > having bigger dedup table size to lower amount of unnecessary hash
> > collisions. Tools like pahole, thus, can tune this parameter to reach
> > optimal performance.
> >
> > This change also serves double purpose of allowing tests to force hash
> > collisions to test some corner cases, used in follow up patch.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >  tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++-----------------
> >  tools/lib/bpf/btf.h |  1 +
> >  2 files changed, 27 insertions(+), 17 deletions(-)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 68b50e9bbde1..6bbb710216e6 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
> >       return err;
> >  }
> >
> > -#define BTF_DEDUP_TABLE_SIZE_LOG 14
> > -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
> > +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
> >  #define BTF_UNPROCESSED_ID ((__u32)-1)
> >  #define BTF_IN_PROGRESS_ID ((__u32)-2)
> >
> > @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
> >  #undef GOLDEN_RATIO_PRIME
> >  }
> >
> > -#define for_each_hash_node(table, hash, node) \
> > -     for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
> > +#define for_each_dedup_cand(d, hash, node) \
> > +     for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
> > +          node;                                                         \
> > +          node = node->next)
> >
> >  static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
> >  {
> >       struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
> > +     int bucket = hash & (d->opts.dedup_table_size - 1);
> >
> >       if (!node)
> >               return -ENOMEM;
> >       node->type_id = type_id;
> > -     node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
> > -     d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
> > +     node->next = d->dedup_table[bucket];
> > +     d->dedup_table[bucket] = node;
> >       return 0;
> >  }
> >
> > @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
> >       if (!d->dedup_table)
> >               return;
> >
> > -     for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
> > +     for (i = 0; i < d->opts.dedup_table_size; i++) {
> >               while (d->dedup_table[i]) {
> >                       tmp = d->dedup_table[i];
> >                       d->dedup_table[i] = tmp->next;
> > @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >       if (!d)
> >               return ERR_PTR(-ENOMEM);
> >
> > +     d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
>
> Is the above line a leftover from some other patch? Doesn't seem related
> to this patch.
>
> The rest seems ok.

No, I added another option and moved options processing to the top of
btf_dedup_new() (it used to be at the end, but it makes sense to first
process options, because they can be required during construction of
btf__dedup, as is the case for dedup_table_size).

>
> > +     /* ensure table size is power of two and limit to 2G */
> > +     d->opts.dedup_table_size = opts && opts->dedup_table_size
> > +             ? opts->dedup_table_size
> > +             : BTF_DEDUP_TABLE_DEFAULT_SIZE;
> > +     for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
> > +             ;
> > +     d->opts.dedup_table_size = 1 << i;
> > +
> >       d->btf = btf;
> >       d->btf_ext = btf_ext;
> >
> > -     d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
> > +     d->dedup_table = calloc(d->opts.dedup_table_size,
> >                               sizeof(struct btf_dedup_node *));
> >       if (!d->dedup_table) {
> >               err = -ENOMEM;
> > @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
> >       for (i = 0; i <= btf->nr_types; i++)
> >               d->hypot_map[i] = BTF_UNPROCESSED_ID;
> >
> > -     d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
> > -
> >  done:
> >       if (err) {
> >               btf_dedup_free(d);
> > @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >       case BTF_KIND_INT:
> >               h = btf_hash_int(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_int(t, cand)) {
> >                               new_id = cand_node->type_id;
> > @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >       case BTF_KIND_ENUM:
> >               h = btf_hash_enum(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_enum(t, cand)) {
> >                               new_id = cand_node->type_id;
> > @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >
> >       case BTF_KIND_FWD:
> >               h = btf_hash_common(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_common(t, cand)) {
> >                               new_id = cand_node->type_id;
> > @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
> >               return 0;
> >
> >       h = btf_hash_struct(t);
> > -     for_each_hash_node(d->dedup_table, h, cand_node) {
> > +     for_each_dedup_cand(d, h, cand_node) {
> >               int eq;
> >
> >               btf_dedup_clear_hypot_map(d);
> > @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >               t->type = ref_type_id;
> >
> >               h = btf_hash_common(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_common(t, cand)) {
> >                               new_id = cand_node->type_id;
> > @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >               info->index_type = ref_type_id;
> >
> >               h = btf_hash_array(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_array(t, cand)) {
> >                               new_id = cand_node->type_id;
> > @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
> >               }
> >
> >               h = btf_hash_fnproto(t);
> > -             for_each_hash_node(d->dedup_table, h, cand_node) {
> > +             for_each_dedup_cand(d, h, cand_node) {
> >                       cand = d->btf->types[cand_node->type_id];
> >                       if (btf_equal_fnproto(t, cand)) {
> >                               new_id = cand_node->type_id;
> > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > index b60bb7cf5fff..28a1e1e59861 100644
> > --- a/tools/lib/bpf/btf.h
> > +++ b/tools/lib/bpf/btf.h
> > @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
> >  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
> >
> >  struct btf_dedup_opts {
> > +     unsigned int dedup_table_size;
> >       bool dont_resolve_fwds;
> >  };
> >
> > --
> > 2.17.1
>
> --
>
> - Arnaldo
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 68b50e9bbde1..6bbb710216e6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1070,8 +1070,7 @@  int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 	return err;
 }
 
-#define BTF_DEDUP_TABLE_SIZE_LOG 14
-#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
+#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)
 
@@ -1128,18 +1127,21 @@  static inline __u32 hash_combine(__u32 h, __u32 value)
 #undef GOLDEN_RATIO_PRIME
 }
 
-#define for_each_hash_node(table, hash, node) \
-	for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
+#define for_each_dedup_cand(d, hash, node) \
+	for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
+	     node;							   \
+	     node = node->next)
 
 static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
 {
 	struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
+	int bucket = hash & (d->opts.dedup_table_size - 1);
 
 	if (!node)
 		return -ENOMEM;
 	node->type_id = type_id;
-	node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
-	d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
+	node->next = d->dedup_table[bucket];
+	d->dedup_table[bucket] = node;
 	return 0;
 }
 
@@ -1177,7 +1179,7 @@  static void btf_dedup_table_free(struct btf_dedup *d)
 	if (!d->dedup_table)
 		return;
 
-	for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
+	for (i = 0; i < d->opts.dedup_table_size; i++) {
 		while (d->dedup_table[i]) {
 			tmp = d->dedup_table[i];
 			d->dedup_table[i] = tmp->next;
@@ -1221,10 +1223,19 @@  static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 	if (!d)
 		return ERR_PTR(-ENOMEM);
 
+	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+	/* ensure table size is power of two and limit to 2G */
+	d->opts.dedup_table_size = opts && opts->dedup_table_size
+		? opts->dedup_table_size
+		: BTF_DEDUP_TABLE_DEFAULT_SIZE;
+	for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size;  i++)
+		;
+	d->opts.dedup_table_size = 1 << i;
+
 	d->btf = btf;
 	d->btf_ext = btf_ext;
 
-	d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
+	d->dedup_table = calloc(d->opts.dedup_table_size,
 				sizeof(struct btf_dedup_node *));
 	if (!d->dedup_table) {
 		err = -ENOMEM;
@@ -1249,8 +1260,6 @@  static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 	for (i = 0; i <= btf->nr_types; i++)
 		d->hypot_map[i] = BTF_UNPROCESSED_ID;
 
-	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
-
 done:
 	if (err) {
 		btf_dedup_free(d);
@@ -1824,7 +1833,7 @@  static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_INT:
 		h = btf_hash_int(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_int(t, cand)) {
 				new_id = cand_node->type_id;
@@ -1835,7 +1844,7 @@  static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_ENUM:
 		h = btf_hash_enum(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_enum(t, cand)) {
 				new_id = cand_node->type_id;
@@ -1846,7 +1855,7 @@  static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_FWD:
 		h = btf_hash_common(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_common(t, cand)) {
 				new_id = cand_node->type_id;
@@ -2263,7 +2272,7 @@  static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		return 0;
 
 	h = btf_hash_struct(t);
-	for_each_hash_node(d->dedup_table, h, cand_node) {
+	for_each_dedup_cand(d, h, cand_node) {
 		int eq;
 
 		btf_dedup_clear_hypot_map(d);
@@ -2349,7 +2358,7 @@  static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		t->type = ref_type_id;
 
 		h = btf_hash_common(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_common(t, cand)) {
 				new_id = cand_node->type_id;
@@ -2372,7 +2381,7 @@  static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		info->index_type = ref_type_id;
 
 		h = btf_hash_array(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_array(t, cand)) {
 				new_id = cand_node->type_id;
@@ -2403,7 +2412,7 @@  static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		}
 
 		h = btf_hash_fnproto(t);
-		for_each_hash_node(d->dedup_table, h, cand_node) {
+		for_each_dedup_cand(d, h, cand_node) {
 			cand = d->btf->types[cand_node->type_id];
 			if (btf_equal_fnproto(t, cand)) {
 				new_id = cand_node->type_id;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b60bb7cf5fff..28a1e1e59861 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -90,6 +90,7 @@  LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 struct btf_dedup_opts {
+	unsigned int dedup_table_size;
 	bool dont_resolve_fwds;
 };