diff mbox series

[bpf-next,1/2] btf: resolve enum fwds in btf_dedup

Message ID 20190311004410.3896503-2-andriin@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series Add fwd enum resolution for btf_dedup() | expand

Commit Message

Andrii Nakryiko March 11, 2019, 12:44 a.m. UTC
GCC and clang support enum forward declarations as an extension. Such
forward-declared enums will be represented as normal BTF_KIND_ENUM types with
vlen=0. This patch adds ability to resolve such enums to their corresponding
fully defined enums. This helps to avoid duplicated BTF type graphs which only
differ by some types referencing forward-declared enum vs full enum.

One such example in kernel is enum irqchip_irq_state, defined in
include/linux/interrupt.h and forward-declared in include/linux/irq.h. This
causes entire struct task_struct and all referenced types to be duplicated in
btf_dedup output. This patch eliminates such duplication cases.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/btf.c | 51 +++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 13 deletions(-)

Comments

Yonghong Song March 11, 2019, 7:02 a.m. UTC | #1
On 3/10/19 5:44 PM, Andrii Nakryiko wrote:
> GCC and clang support enum forward declarations as an extension. Such
> forward-declared enums will be represented as normal BTF_KIND_ENUM types with
> vlen=0. This patch adds ability to resolve such enums to their corresponding

Thanks Andrii for reporting and trying to fix forward-declared enum 
issue. The current approach to represent forward enum with
BTF_KIND_ENUM with vlen = 0 does work. "enum A {};" is illegal for C,
so vlen=0 must be a forward declaration.

But this is probably not the best since we have BTF_KIND_FWD to make 
forward declaration explicit.

Currently BTF_KIND_FWD uses (btf_type.info >> 31) to distinguish
struct/union. If btf_type.info >> 31 == 0, it is a struct, otherwise
it is a union.

We could extend this to cover forward enum as well with backward 
compability. We can extend by using both bit 30 and bit 31:
   btf_type.info >> 31   (btf_type.info >> 30) & 0x1
         0                  0                  <== struct
         1                  0                  <== union
         0                  1                  <== enum

Maybe this may be reasonable?

This will require pahole and llvm change to support, besides kernel 
support. I will try to prototype llvm support tomorrow.


> fully defined enums. This helps to avoid duplicated BTF type graphs which only
> differ by some types referencing forward-declared enum vs full enum.
> 
> One such example in kernel is enum irqchip_irq_state, defined in
> include/linux/interrupt.h and forward-declared in include/linux/irq.h. This
> causes entire struct task_struct and all referenced types to be duplicated in
> btf_dedup output. This patch eliminates such duplication cases.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>   tools/lib/bpf/btf.c | 51 +++++++++++++++++++++++++++++++++------------
>   1 file changed, 38 insertions(+), 13 deletions(-)
> 
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 1b8d8cdd3575..87e3020ac1bc 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
>   /* Calculate type signature hash of ENUM. */
>   static __u32 btf_hash_enum(struct btf_type *t)
>   {
> -	struct btf_enum *member = (struct btf_enum *)(t + 1);
> -	__u32 vlen = BTF_INFO_VLEN(t->info);
> -	__u32 h = btf_hash_common(t);
> -	int i;
> +	__u32 h;
>   
> -	for (i = 0; i < vlen; i++) {
> -		h = hash_combine(h, member->name_off);
> -		h = hash_combine(h, member->val);
> -		member++;
> -	}
> +	/* don't hash vlen and enum members to support enum fwd resolving */
> +	h = hash_combine(0, t->name_off);
> +	h = hash_combine(h, t->info & ~0xffff);
> +	h = hash_combine(h, t->size);
>   	return h;
>   }
>   
> @@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
>   	return true;
>   }
>   
> +static inline bool btf_is_enum_fwd(struct btf_type *t)
> +{
> +	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
> +	       BTF_INFO_VLEN(t->info) == 0;
> +}
> +
> +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
> +{
> +	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
> +		return btf_equal_enum(t1, t2);
> +	/* ignore vlen when comparing */
> +	return t1->name_off == t2->name_off &&
> +	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
> +	       t1->size == t2->size;
> +}
> +
>   /*
>    * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
>    * as referenced type IDs equivalence is established separately during type
> @@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
>   				new_id = cand_node->type_id;
>   				break;
>   			}
> +			if (d->opts.dont_resolve_fwds)
> +				continue;
> +			if (btf_compat_enum(t, cand)) {
> +				if (btf_is_enum_fwd(t)) {
> +					/* resolve fwd to full enum */
> +					new_id = cand_node->type_id;
> +					break;
> +				}
> +				/* resolve canonical enum fwd to full enum */
> +				d->map[cand_node->type_id] = type_id;
> +			}
>   		}
>   		break;
>   
> @@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
>   		return fwd_kind == real_kind;
>   	}
>   
> -	if (cand_type->info != canon_type->info)
> -		return 0;
> -
>   	switch (cand_kind) {
>   	case BTF_KIND_INT:
>   		return btf_equal_int(cand_type, canon_type);
>   
>   	case BTF_KIND_ENUM:
> -		return btf_equal_enum(cand_type, canon_type);
> +		if (d->opts.dont_resolve_fwds)
> +			return btf_equal_enum(cand_type, canon_type);
> +		else
> +			return btf_compat_enum(cand_type, canon_type);
>   
>   	case BTF_KIND_FWD:
>   		return btf_equal_common(cand_type, canon_type);
> @@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
>   	case BTF_KIND_PTR:
>   	case BTF_KIND_TYPEDEF:
>   	case BTF_KIND_FUNC:
> +		if (cand_type->info != canon_type->info)
> +			return 0;
>   		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
>   
>   	case BTF_KIND_ARRAY: {
>
Andrii Nakryiko March 11, 2019, 4:34 p.m. UTC | #2
On Mon, Mar 11, 2019 at 12:02 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 3/10/19 5:44 PM, Andrii Nakryiko wrote:
> > GCC and clang support enum forward declarations as an extension. Such
> > forward-declared enums will be represented as normal BTF_KIND_ENUM types with
> > vlen=0. This patch adds ability to resolve such enums to their corresponding
>
> Thanks Andrii for reporting and trying to fix forward-declared enum
> issue. The current approach to represent forward enum with
> BTF_KIND_ENUM with vlen = 0 does work. "enum A {};" is illegal for C,
> so vlen=0 must be a forward declaration.
>
> But this is probably not the best since we have BTF_KIND_FWD to make
> forward declaration explicit.

Yeah, that's true. See below, though, why for enum it's a bit inconvenient.

In retrospect, I'm wondering if having a special "declaration vs
definition" bit in btf_type->info for struct/union would be better
approach, kind of like DWARF's DW_AT_declaration attribute. But it's a
bit late now.

>
> Currently BTF_KIND_FWD uses (btf_type.info >> 31) to distinguish
> struct/union. If btf_type.info >> 31 == 0, it is a struct, otherwise
> it is a union.
>
> We could extend this to cover forward enum as well with backward
> compability. We can extend by using both bit 30 and bit 31:
>    btf_type.info >> 31   (btf_type.info >> 30) & 0x1
>          0                  0                  <== struct
>          1                  0                  <== union
>          0                  1                  <== enum
>
> Maybe this may be reasonable?

The only thing we should consider is that enums can have different
sizes. And enum size is part of enum's forward declaration. So unlike
struct/union fwd, enum's fwd has extra info. I don't think it's
possible to specify in C (enum is always 4 bytes), but C++ definitely
allows to specify custom underlying integer type.

So I don't know, do you think we should just ignore size and go with
BTF_KIND_FWD?

>
> This will require pahole and llvm change to support, besides kernel
> support. I will try to prototype llvm support tomorrow.
>
>
> > fully defined enums. This helps to avoid duplicated BTF type graphs which only
> > differ by some types referencing forward-declared enum vs full enum.
> >
> > One such example in kernel is enum irqchip_irq_state, defined in
> > include/linux/interrupt.h and forward-declared in include/linux/irq.h. This
> > causes entire struct task_struct and all referenced types to be duplicated in
> > btf_dedup output. This patch eliminates such duplication cases.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >   tools/lib/bpf/btf.c | 51 +++++++++++++++++++++++++++++++++------------
> >   1 file changed, 38 insertions(+), 13 deletions(-)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 1b8d8cdd3575..87e3020ac1bc 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
> >   /* Calculate type signature hash of ENUM. */
> >   static __u32 btf_hash_enum(struct btf_type *t)
> >   {
> > -     struct btf_enum *member = (struct btf_enum *)(t + 1);
> > -     __u32 vlen = BTF_INFO_VLEN(t->info);
> > -     __u32 h = btf_hash_common(t);
> > -     int i;
> > +     __u32 h;
> >
> > -     for (i = 0; i < vlen; i++) {
> > -             h = hash_combine(h, member->name_off);
> > -             h = hash_combine(h, member->val);
> > -             member++;
> > -     }
> > +     /* don't hash vlen and enum members to support enum fwd resolving */
> > +     h = hash_combine(0, t->name_off);
> > +     h = hash_combine(h, t->info & ~0xffff);
> > +     h = hash_combine(h, t->size);
> >       return h;
> >   }
> >
> > @@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
> >       return true;
> >   }
> >
> > +static inline bool btf_is_enum_fwd(struct btf_type *t)
> > +{
> > +     return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
> > +            BTF_INFO_VLEN(t->info) == 0;
> > +}
> > +
> > +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
> > +{
> > +     if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
> > +             return btf_equal_enum(t1, t2);
> > +     /* ignore vlen when comparing */
> > +     return t1->name_off == t2->name_off &&
> > +            (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
> > +            t1->size == t2->size;
> > +}
> > +
> >   /*
> >    * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
> >    * as referenced type IDs equivalence is established separately during type
> > @@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
> >                               new_id = cand_node->type_id;
> >                               break;
> >                       }
> > +                     if (d->opts.dont_resolve_fwds)
> > +                             continue;
> > +                     if (btf_compat_enum(t, cand)) {
> > +                             if (btf_is_enum_fwd(t)) {
> > +                                     /* resolve fwd to full enum */
> > +                                     new_id = cand_node->type_id;
> > +                                     break;
> > +                             }
> > +                             /* resolve canonical enum fwd to full enum */
> > +                             d->map[cand_node->type_id] = type_id;
> > +                     }
> >               }
> >               break;
> >
> > @@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> >               return fwd_kind == real_kind;
> >       }
> >
> > -     if (cand_type->info != canon_type->info)
> > -             return 0;
> > -
> >       switch (cand_kind) {
> >       case BTF_KIND_INT:
> >               return btf_equal_int(cand_type, canon_type);
> >
> >       case BTF_KIND_ENUM:
> > -             return btf_equal_enum(cand_type, canon_type);
> > +             if (d->opts.dont_resolve_fwds)
> > +                     return btf_equal_enum(cand_type, canon_type);
> > +             else
> > +                     return btf_compat_enum(cand_type, canon_type);
> >
> >       case BTF_KIND_FWD:
> >               return btf_equal_common(cand_type, canon_type);
> > @@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> >       case BTF_KIND_PTR:
> >       case BTF_KIND_TYPEDEF:
> >       case BTF_KIND_FUNC:
> > +             if (cand_type->info != canon_type->info)
> > +                     return 0;
> >               return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
> >
> >       case BTF_KIND_ARRAY: {
> >
Edward Cree March 11, 2019, 5:27 p.m. UTC | #3
On 11/03/19 16:34, Andrii Nakryiko wrote:
> The only thing we should consider is that enums can have different
> sizes. And enum size is part of enum's forward declaration. So unlike
> struct/union fwd, enum's fwd has extra info. I don't think it's
> possible to specify in C (enum is always 4 bytes),
Not always, see e.g. N1458 §6.7.2.2.4:
> Each enumerated type shall be compatible with char, a signed integer
> type, or an unsigned integer type. The choice of type is implementation-
> defined,¹²⁸⁾ but shall be capable of representing the values of all the
> members of the enumeration. [...]
The enumeration _constants_ have type int (§6.7.2.2.3), but the enum type
itself has implementation-defined size, per the above.
C doesn't allow to forward-declare enumerators (§6.7.2.3.3), nor even to
reference them from the enumerator-list in the declaration, since the
enumerator type is incomplete until the closing brace (§6.7.2.2.4).
Footnote 128 adds that "An implementation may delay the choice of which
integer type until all enumeration constants have been seen."
It appears, in any case, that no forward-declaration could be required in
BTF, since an enum type's BTF record does not reference other types.  With
something like
    enum foo {
        BAR = sizeof(enum foo *),
    };
which is not valid C thanks to §6.7.2.3.3 (but many compilers will accept
it, e.g. gcc without -pedantic), the BTF record would read
    kind=enum name_off=&"foo" vlen=1
        name_off=&"BAR" val=8
(assuming 64-bit) which does not require any forward-declaring.

I don't know about the C++ situation, though.

-Ed

PS: It's not really clear to me what the rationale for §6.7.2.3.3 is,
since all the problems with incomplete enum types also arise with other
incomplete types; why 'enum foo *' can't be treated like 'struct quux *'
I don't know.  But this isn't comp.std.c...
Andrii Nakryiko March 11, 2019, 5:39 p.m. UTC | #4
On Mon, Mar 11, 2019 at 10:27 AM Edward Cree <ecree@solarflare.com> wrote:
>
> On 11/03/19 16:34, Andrii Nakryiko wrote:
> > The only thing we should consider is that enums can have different
> > sizes. And enum size is part of enum's forward declaration. So unlike
> > struct/union fwd, enum's fwd has extra info. I don't think it's
> > possible to specify in C (enum is always 4 bytes),
> Not always, see e.g. N1458 §6.7.2.2.4:

Yeah, you are right, sorry. I was mostly trying to point out that C
itself doesn't allow programmer to specify size, while C++ does.


> > Each enumerated type shall be compatible with char, a signed integer
> > type, or an unsigned integer type. The choice of type is implementation-
> > defined,¹²⁸⁾ but shall be capable of representing the values of all the
> > members of the enumeration. [...]
> The enumeration _constants_ have type int (§6.7.2.2.3), but the enum type
> itself has implementation-defined size, per the above.
> C doesn't allow to forward-declare enumerators (§6.7.2.3.3), nor even to

There is a GCC extension that allows to forward-declare enums:
https://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html#Incomplete-Enums

Kernel actually has examples of using this (see patch description
regrading irqchip_irq_state).

> reference them from the enumerator-list in the declaration, since the
> enumerator type is incomplete until the closing brace (§6.7.2.2.4).
> Footnote 128 adds that "An implementation may delay the choice of which
> integer type until all enumeration constants have been seen."
> It appears, in any case, that no forward-declaration could be required in
> BTF, since an enum type's BTF record does not reference other types.  With
> something like
>     enum foo {
>         BAR = sizeof(enum foo *),
>     };
> which is not valid C thanks to §6.7.2.3.3 (but many compilers will accept
> it, e.g. gcc without -pedantic), the BTF record would read
>     kind=enum name_off=&"foo" vlen=1
>         name_off=&"BAR" val=8
> (assuming 64-bit) which does not require any forward-declaring.
>
> I don't know about the C++ situation, though.

C++ allows to forward-declare enums only if as part of declaration you
also specify underlying integer type.

>
> -Ed
>
> PS: It's not really clear to me what the rationale for §6.7.2.3.3 is,
> since all the problems with incomplete enum types also arise with other
> incomplete types; why 'enum foo *' can't be treated like 'struct quux *'

Seems like GCC's extension allows exactly this.

> I don't know.  But this isn't comp.std.c...
Alexei Starovoitov March 14, 2019, 9:01 p.m. UTC | #5
On Mon, Mar 11, 2019 at 10:39:37AM -0700, Andrii Nakryiko wrote:
> 
> There is a GCC extension that allows to forward-declare enums:
> https://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html#Incomplete-Enums
> 
> Kernel actually has examples of using this (see patch description
> regrading irqchip_irq_state).

we've considered several different ways of either extending or changing BTF api
to accommodate this unforeseen C langauge extension.
All of them require patches for the kernel 5.1 and 5.0 and/or llvm,
so we've decided to proceed with the existing kernel code and llvm
and only tweak btf dedup logic the way this patch does.

I've applied this set to bpf tree.
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1b8d8cdd3575..87e3020ac1bc 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1602,16 +1602,12 @@  static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
 /* Calculate type signature hash of ENUM. */
 static __u32 btf_hash_enum(struct btf_type *t)
 {
-	struct btf_enum *member = (struct btf_enum *)(t + 1);
-	__u32 vlen = BTF_INFO_VLEN(t->info);
-	__u32 h = btf_hash_common(t);
-	int i;
+	__u32 h;
 
-	for (i = 0; i < vlen; i++) {
-		h = hash_combine(h, member->name_off);
-		h = hash_combine(h, member->val);
-		member++;
-	}
+	/* don't hash vlen and enum members to support enum fwd resolving */
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info & ~0xffff);
+	h = hash_combine(h, t->size);
 	return h;
 }
 
@@ -1637,6 +1633,22 @@  static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
 	return true;
 }
 
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
+	       BTF_INFO_VLEN(t->info) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+		return btf_equal_enum(t1, t2);
+	/* ignore vlen when comparing */
+	return t1->name_off == t2->name_off &&
+	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+	       t1->size == t2->size;
+}
+
 /*
  * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
  * as referenced type IDs equivalence is established separately during type
@@ -1860,6 +1872,17 @@  static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 				new_id = cand_node->type_id;
 				break;
 			}
+			if (d->opts.dont_resolve_fwds)
+				continue;
+			if (btf_compat_enum(t, cand)) {
+				if (btf_is_enum_fwd(t)) {
+					/* resolve fwd to full enum */
+					new_id = cand_node->type_id;
+					break;
+				}
+				/* resolve canonical enum fwd to full enum */
+				d->map[cand_node->type_id] = type_id;
+			}
 		}
 		break;
 
@@ -2084,15 +2107,15 @@  static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return fwd_kind == real_kind;
 	}
 
-	if (cand_type->info != canon_type->info)
-		return 0;
-
 	switch (cand_kind) {
 	case BTF_KIND_INT:
 		return btf_equal_int(cand_type, canon_type);
 
 	case BTF_KIND_ENUM:
-		return btf_equal_enum(cand_type, canon_type);
+		if (d->opts.dont_resolve_fwds)
+			return btf_equal_enum(cand_type, canon_type);
+		else
+			return btf_compat_enum(cand_type, canon_type);
 
 	case BTF_KIND_FWD:
 		return btf_equal_common(cand_type, canon_type);
@@ -2103,6 +2126,8 @@  static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+		if (cand_type->info != canon_type->info)
+			return 0;
 		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
 
 	case BTF_KIND_ARRAY: {