diff mbox series

[PATCHv5,bpf] bpf: Move iterator functions into special init section

Message ID 20201109185754.377373-1-jolsa@kernel.org
State Not Applicable
Delegated to: BPF Maintainers
Headers show
Series [PATCHv5,bpf] bpf: Move iterator functions into special init section | expand

Checks

Context Check Description
jkicinski/cover_letter success Link
jkicinski/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
jkicinski/patch_count success Link
jkicinski/tree_selection success Clearly marked for bpf
jkicinski/subject_prefix success Link
jkicinski/source_inline success Was 0 now: 0
jkicinski/verify_signedoff success Link
jkicinski/module_param success Was 0 now: 0
jkicinski/build_32bit success Errors and warnings before: 31331 this patch: 31331
jkicinski/kdoc success Errors and warnings before: 0 this patch: 0
jkicinski/verify_fixes success Link
jkicinski/checkpatch warning CHECK: spaces preferred around that '*' (ctx:VxB) WARNING: From:/Signed-off-by: email address mismatch: 'From: Jiri Olsa <jolsa@kernel.org>' != 'Signed-off-by: Jiri Olsa <jolsa@redhat.com>'
jkicinski/build_allmodconfig_warn success Errors and warnings before: 29392 this patch: 29392
jkicinski/header_inline success Link
jkicinski/stable success Stable not CCed

Commit Message

Jiri Olsa Nov. 9, 2020, 6:57 p.m. UTC
With upcoming changes to pahole, that change the way how and
which kernel functions are stored in BTF data, we need a way
to recognize iterator functions.

Iterator functions need to be in BTF data, but have no real
body and are currently placed in .init.text section, so they
are freed after kernel init and are filtered out of BTF data
because of that.

The solution is to place these functions under new section:
  .init.bpf.preserve_type

And add 2 new symbols to mark that area:
  __init_bpf_preserve_type_begin
  __init_bpf_preserve_type_end

The code in pahole responsible for picking up the functions will
be able to recognize functions from this section and add them to
the BTF data and filter out all other .init.text functions.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Suggested-by: Yonghong Song <yhs@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
v5 changes:
  - use "" in __section macro due to:
    33def8498fdde180 ("treewide: Convert macro and uses of __section(foo) to __section("foo")")
    [Arnaldo]

v4: https://lore.kernel.org/bpf/20201106222512.52454-1-jolsa@kernel.org/

 include/asm-generic/vmlinux.lds.h | 16 +++++++++++++++-
 include/linux/bpf.h               |  8 +++++++-
 include/linux/init.h              |  1 +
 3 files changed, 23 insertions(+), 2 deletions(-)

Comments

Daniel Borkmann Nov. 9, 2020, 10:04 p.m. UTC | #1
On 11/9/20 7:57 PM, Jiri Olsa wrote:
> With upcoming changes to pahole, that change the way how and
> which kernel functions are stored in BTF data, we need a way
> to recognize iterator functions.
> 
> Iterator functions need to be in BTF data, but have no real
> body and are currently placed in .init.text section, so they
> are freed after kernel init and are filtered out of BTF data
> because of that.
> 
> The solution is to place these functions under new section:
>    .init.bpf.preserve_type
> 
> And add 2 new symbols to mark that area:
>    __init_bpf_preserve_type_begin
>    __init_bpf_preserve_type_end
> 
> The code in pahole responsible for picking up the functions will
> be able to recognize functions from this section and add them to
> the BTF data and filter out all other .init.text functions.
> 
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Suggested-by: Yonghong Song <yhs@fb.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@redhat.com>
> ---
> v5 changes:
>    - use "" in __section macro due to:
>      33def8498fdde180 ("treewide: Convert macro and uses of __section(foo) to __section("foo")")
>      [Arnaldo]
> 
> v4: https://lore.kernel.org/bpf/20201106222512.52454-1-jolsa@kernel.org/
> 
>   include/asm-generic/vmlinux.lds.h | 16 +++++++++++++++-
>   include/linux/bpf.h               |  8 +++++++-
>   include/linux/init.h              |  1 +
>   3 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index b2b3d81b1535..f91029b3443b 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -685,8 +685,21 @@
>   	.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {			\
>   		*(.BTF_ids)						\
>   	}
> +
> +/*
> + * .init.bpf.preserve_type
> + *
> + * This section store special BPF function and marks them
> + * with begin/end symbols pair for the sake of pahole tool.
> + */
> +#define INIT_BPF_PRESERVE_TYPE						\
> +	__init_bpf_preserve_type_begin = .;                             \
> +	*(.init.bpf.preserve_type)                                      \
> +	__init_bpf_preserve_type_end = .;				\
> +	MEM_DISCARD(init.bpf.preserve_type)
>   #else
>   #define BTF
> +#define INIT_BPF_PRESERVE_TYPE
>   #endif
>   
>   /*
> @@ -741,7 +754,8 @@
>   #define INIT_TEXT							\
>   	*(.init.text .init.text.*)					\
>   	*(.text.startup)						\
> -	MEM_DISCARD(init.text*)
> +	MEM_DISCARD(init.text*)						\
> +	INIT_BPF_PRESERVE_TYPE
>   
>   #define EXIT_DATA							\
>   	*(.exit.data .exit.data.*)					\
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 2b16bf48aab6..73e8ededde3e 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1276,10 +1276,16 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
>   int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>   int bpf_obj_get_user(const char __user *pathname, int flags);
>   
> +#ifdef CONFIG_DEBUG_INFO_BTF
> +#define BPF_INIT __init_bpf_preserve_type
> +#else
> +#define BPF_INIT __init
> +#endif
> +
>   #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
>   #define DEFINE_BPF_ITER_FUNC(target, args...)			\
>   	extern int bpf_iter_ ## target(args);			\
> -	int __init bpf_iter_ ## target(args) { return 0; }
> +	int BPF_INIT bpf_iter_ ## target(args) { return 0; }
>   
>   struct bpf_iter_aux_info {
>   	struct bpf_map *map;
> diff --git a/include/linux/init.h b/include/linux/init.h
> index 7b53cb3092ee..a7c71e3b5f9a 100644
> --- a/include/linux/init.h
> +++ b/include/linux/init.h
> @@ -52,6 +52,7 @@
>   #define __initconst	__section(".init.rodata")
>   #define __exitdata	__section(".exit.data")
>   #define __exit_call	__used __section(".exitcall.exit")
> +#define __init_bpf_preserve_type __section(".init.bpf.preserve_type")

Small nit, why this detour via BPF_INIT define? Couldn't we just:

#ifdef CONFIG_DEBUG_INFO_BTF
#define __init_bpf_preserve_type   __section(".init.bpf.preserve_type")
#else
#define __init_bpf_preserve_type   __init
#endif

Also, the comment above the existing defines says '/* These are for everybody (although
not all archs will actually discard it in modules) */' ... We should probably not add
the __init_bpf_preserve_type right under this listing as-is in your patch, but instead
'separate' it by adding a small comment on top of its definition by explaining its
purpose more clearly for others.

>   /*
>    * modpost check for section mismatches during the kernel build.
>
Jiri Olsa Nov. 10, 2020, 10:35 a.m. UTC | #2
On Mon, Nov 09, 2020 at 11:04:34PM +0100, Daniel Borkmann wrote:

SNIP

> > index 7b53cb3092ee..a7c71e3b5f9a 100644
> > --- a/include/linux/init.h
> > +++ b/include/linux/init.h
> > @@ -52,6 +52,7 @@
> >   #define __initconst	__section(".init.rodata")
> >   #define __exitdata	__section(".exit.data")
> >   #define __exit_call	__used __section(".exitcall.exit")
> > +#define __init_bpf_preserve_type __section(".init.bpf.preserve_type")
> 
> Small nit, why this detour via BPF_INIT define? Couldn't we just:
> 
> #ifdef CONFIG_DEBUG_INFO_BTF
> #define __init_bpf_preserve_type   __section(".init.bpf.preserve_type")
> #else
> #define __init_bpf_preserve_type   __init
> #endif
> 
> Also, the comment above the existing defines says '/* These are for everybody (although
> not all archs will actually discard it in modules) */' ... We should probably not add
> the __init_bpf_preserve_type right under this listing as-is in your patch, but instead
> 'separate' it by adding a small comment on top of its definition by explaining its
> purpose more clearly for others.

ok, for some reason I thought I needed to add it to init.h,
but as it's bpf specific, perhaps we can omit init.h change
completely.. how about the change below?

thanks,
jirka


---
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b2b3d81b1535..f91029b3443b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -685,8 +685,21 @@
 	.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {			\
 		*(.BTF_ids)						\
 	}
+
+/*
+ * .init.bpf.preserve_type
+ *
+ * This section store special BPF function and marks them
+ * with begin/end symbols pair for the sake of pahole tool.
+ */
+#define INIT_BPF_PRESERVE_TYPE						\
+	__init_bpf_preserve_type_begin = .;                             \
+	*(.init.bpf.preserve_type)                                      \
+	__init_bpf_preserve_type_end = .;				\
+	MEM_DISCARD(init.bpf.preserve_type)
 #else
 #define BTF
+#define INIT_BPF_PRESERVE_TYPE
 #endif
 
 /*
@@ -741,7 +754,8 @@
 #define INIT_TEXT							\
 	*(.init.text .init.text.*)					\
 	*(.text.startup)						\
-	MEM_DISCARD(init.text*)
+	MEM_DISCARD(init.text*)						\
+	INIT_BPF_PRESERVE_TYPE
 
 #define EXIT_DATA							\
 	*(.exit.data .exit.data.*)					\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b16bf48aab6..1739a92516ed 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1276,10 +1276,20 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+/* In case we generate BTF data, we need to group all iterator
+ * functions into special init section, so pahole can track them.
+ * Otherwise pure __init section is enough.
+ */
+#ifdef CONFIG_DEBUG_INFO_BTF
+#define __init_bpf_preserve_type __section(".init.bpf.preserve_type")
+#else
+#define __init_bpf_preserve_type __init
+#endif
+
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
 	extern int bpf_iter_ ## target(args);			\
-	int __init bpf_iter_ ## target(args) { return 0; }
+	int __init_bpf_preserve_type bpf_iter_ ## target(args) { return 0; }
 
 struct bpf_iter_aux_info {
 	struct bpf_map *map;
Daniel Borkmann Nov. 10, 2020, 11:30 p.m. UTC | #3
On 11/10/20 11:35 AM, Jiri Olsa wrote:
> On Mon, Nov 09, 2020 at 11:04:34PM +0100, Daniel Borkmann wrote:
> 
> SNIP
> 
>>> index 7b53cb3092ee..a7c71e3b5f9a 100644
>>> --- a/include/linux/init.h
>>> +++ b/include/linux/init.h
>>> @@ -52,6 +52,7 @@
>>>    #define __initconst	__section(".init.rodata")
>>>    #define __exitdata	__section(".exit.data")
>>>    #define __exit_call	__used __section(".exitcall.exit")
>>> +#define __init_bpf_preserve_type __section(".init.bpf.preserve_type")
>>
>> Small nit, why this detour via BPF_INIT define? Couldn't we just:
>>
>> #ifdef CONFIG_DEBUG_INFO_BTF
>> #define __init_bpf_preserve_type   __section(".init.bpf.preserve_type")
>> #else
>> #define __init_bpf_preserve_type   __init
>> #endif
>>
>> Also, the comment above the existing defines says '/* These are for everybody (although
>> not all archs will actually discard it in modules) */' ... We should probably not add
>> the __init_bpf_preserve_type right under this listing as-is in your patch, but instead
>> 'separate' it by adding a small comment on top of its definition by explaining its
>> purpose more clearly for others.
> 
> ok, for some reason I thought I needed to add it to init.h,
> but as it's bpf specific, perhaps we can omit init.h change
> completely.. how about the change below?

Agree, that looks much better, thanks!
diff mbox series

Patch

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b2b3d81b1535..f91029b3443b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -685,8 +685,21 @@ 
 	.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {			\
 		*(.BTF_ids)						\
 	}
+
+/*
+ * .init.bpf.preserve_type
+ *
+ * This section store special BPF function and marks them
+ * with begin/end symbols pair for the sake of pahole tool.
+ */
+#define INIT_BPF_PRESERVE_TYPE						\
+	__init_bpf_preserve_type_begin = .;                             \
+	*(.init.bpf.preserve_type)                                      \
+	__init_bpf_preserve_type_end = .;				\
+	MEM_DISCARD(init.bpf.preserve_type)
 #else
 #define BTF
+#define INIT_BPF_PRESERVE_TYPE
 #endif
 
 /*
@@ -741,7 +754,8 @@ 
 #define INIT_TEXT							\
 	*(.init.text .init.text.*)					\
 	*(.text.startup)						\
-	MEM_DISCARD(init.text*)
+	MEM_DISCARD(init.text*)						\
+	INIT_BPF_PRESERVE_TYPE
 
 #define EXIT_DATA							\
 	*(.exit.data .exit.data.*)					\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b16bf48aab6..73e8ededde3e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1276,10 +1276,16 @@  struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+#ifdef CONFIG_DEBUG_INFO_BTF
+#define BPF_INIT __init_bpf_preserve_type
+#else
+#define BPF_INIT __init
+#endif
+
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
 	extern int bpf_iter_ ## target(args);			\
-	int __init bpf_iter_ ## target(args) { return 0; }
+	int BPF_INIT bpf_iter_ ## target(args) { return 0; }
 
 struct bpf_iter_aux_info {
 	struct bpf_map *map;
diff --git a/include/linux/init.h b/include/linux/init.h
index 7b53cb3092ee..a7c71e3b5f9a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -52,6 +52,7 @@ 
 #define __initconst	__section(".init.rodata")
 #define __exitdata	__section(".exit.data")
 #define __exit_call	__used __section(".exitcall.exit")
+#define __init_bpf_preserve_type __section(".init.bpf.preserve_type")
 
 /*
  * modpost check for section mismatches during the kernel build.