diff mbox series

[v2,bpf-next,6/7] bpf: add support for %pT format specifier for bpf_trace_printk() helper

Message ID 1589263005-7887-7-git-send-email-alan.maguire@oracle.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf, printk: add BTF-based type printing | expand

Commit Message

Alan Maguire May 12, 2020, 5:56 a.m. UTC
Allow %pT[cNx0] format specifier for BTF-based display of data associated
with pointer.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 include/uapi/linux/bpf.h       | 27 ++++++++++++++++++++++-----
 kernel/trace/bpf_trace.c       | 21 ++++++++++++++++++---
 tools/include/uapi/linux/bpf.h | 27 ++++++++++++++++++++++-----
 3 files changed, 62 insertions(+), 13 deletions(-)

Comments

Yonghong Song May 14, 2020, 12:53 a.m. UTC | #1
On 5/11/20 10:56 PM, Alan Maguire wrote:
> Allow %pT[cNx0] format specifier for BTF-based display of data associated
> with pointer.
> 
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>   include/uapi/linux/bpf.h       | 27 ++++++++++++++++++++++-----
>   kernel/trace/bpf_trace.c       | 21 ++++++++++++++++++---
>   tools/include/uapi/linux/bpf.h | 27 ++++++++++++++++++++++-----
>   3 files changed, 62 insertions(+), 13 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9d1932e..ab3c86c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -695,7 +695,12 @@ struct bpf_stack_build_id {
>    * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
>    * 		available. It can take up to three additional **u64**
>    * 		arguments (as an eBPF helpers, the total number of arguments is
> - * 		limited to five).
> + *		limited to five), and also supports %pT (BTF-based type
> + *		printing), as long as BPF_READ lockdown is not active.
> + *		"%pT" takes a "struct __btf_ptr *" as an argument; it
> + *		consists of a pointer value and specified BTF type string or id
> + *		used to select the type for display.  For more details, see
> + *		Documentation/core-api/printk-formats.rst.
>    *
>    * 		Each time the helper is called, it appends a line to the trace.
>    * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
> @@ -731,10 +736,10 @@ struct bpf_stack_build_id {
>    * 		The conversion specifiers supported by *fmt* are similar, but
>    * 		more limited than for printk(). They are **%d**, **%i**,
>    * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
> - * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
> - * 		of field, padding with zeroes, etc.) is available, and the
> - * 		helper will return **-EINVAL** (but print nothing) if it
> - * 		encounters an unknown specifier.
> + *		**%lli**, **%llu**, **%llx**, **%p**, **%pT[cNx0], **%s**.
> + *		Only %pT supports modifiers, and the helper will return
> + *		**-EINVAL** (but print nothing) if it encouters an unknown
> + *		specifier.
>    *
>    * 		Also, note that **bpf_trace_printk**\ () is slow, and should
>    * 		only be used for debugging purposes. For this reason, a notice
> @@ -4058,4 +4063,16 @@ struct bpf_pidns_info {
>   	__u32 pid;
>   	__u32 tgid;
>   };
> +
> +/*
> + * struct __btf_ptr is used for %pT (typed pointer) display; the
> + * additional type string/BTF id are used to render the pointer
> + * data as the appropriate type.
> + */
> +struct __btf_ptr {
> +	void *ptr;
> +	const char *type;
> +	__u32 id;
> +};
> +
>   #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index d961428..c032c58 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -321,9 +321,12 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
>   	return &bpf_probe_write_user_proto;
>   }
>   
> +#define isbtffmt(c)	\
> +	(c == 'T' || c == 'c' || c == 'N' || c == 'x' || c == '0')
> +
>   /*
>    * Only limited trace_printk() conversion specifiers allowed:
> - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
> + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pT %s
>    */
>   BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
>   	   u64, arg2, u64, arg3)
> @@ -361,8 +364,20 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
>   			i++;
>   		} else if (fmt[i] == 'p' || fmt[i] == 's') {
>   			mod[fmt_cnt]++;
> -			/* disallow any further format extensions */
> -			if (fmt[i + 1] != 0 &&
> +			/*
> +			 * allow BTF type-based printing, and disallow any
> +			 * further format extensions.
> +			 */
> +			if (fmt[i] == 'p' && fmt[i + 1] == 'T') {
> +				int ret;
> +
> +				ret = security_locked_down(LOCKDOWN_BPF_READ);
> +				if (unlikely(ret < 0))
> +					return ret;
> +				i++;
> +				while (isbtffmt(fmt[i]))
> +					i++;

The pointer passed to the helper may not be valid pointer. I think you
need to do a probe_read_kernel() here. Do an atomic memory allocation
here should be okay as this is mostly for debugging only.


> +			} else if (fmt[i + 1] != 0 &&
>   			    !isspace(fmt[i + 1]) &&
>   			    !ispunct(fmt[i + 1]))
>   				return -EINVAL;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 9d1932e..ab3c86c 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -695,7 +695,12 @@ struct bpf_stack_build_id {
>    * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
>    * 		available. It can take up to three additional **u64**
>    * 		arguments (as an eBPF helpers, the total number of arguments is
> - * 		limited to five).
> + *		limited to five), and also supports %pT (BTF-based type
> + *		printing), as long as BPF_READ lockdown is not active.
> + *		"%pT" takes a "struct __btf_ptr *" as an argument; it
> + *		consists of a pointer value and specified BTF type string or id
> + *		used to select the type for display.  For more details, see
> + *		Documentation/core-api/printk-formats.rst.
>    *
>    * 		Each time the helper is called, it appends a line to the trace.
>    * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
> @@ -731,10 +736,10 @@ struct bpf_stack_build_id {
>    * 		The conversion specifiers supported by *fmt* are similar, but
>    * 		more limited than for printk(). They are **%d**, **%i**,
>    * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
> - * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
> - * 		of field, padding with zeroes, etc.) is available, and the
> - * 		helper will return **-EINVAL** (but print nothing) if it
> - * 		encounters an unknown specifier.
> + *		**%lli**, **%llu**, **%llx**, **%p**, **%pT[cNx0], **%s**.
> + *		Only %pT supports modifiers, and the helper will return
> + *		**-EINVAL** (but print nothing) if it encouters an unknown
> + *		specifier.
>    *
>    * 		Also, note that **bpf_trace_printk**\ () is slow, and should
>    * 		only be used for debugging purposes. For this reason, a notice
> @@ -4058,4 +4063,16 @@ struct bpf_pidns_info {
>   	__u32 pid;
>   	__u32 tgid;
>   };
> +
> +/*
> + * struct __btf_ptr is used for %pT (typed pointer) display; the
> + * additional type string/BTF id are used to render the pointer
> + * data as the appropriate type.
> + */
> +struct __btf_ptr {
> +	void *ptr;
> +	const char *type;
> +	__u32 id;
> +};
> +
>   #endif /* _UAPI__LINUX_BPF_H__ */
>
Alan Maguire May 18, 2020, 9:10 a.m. UTC | #2
On Wed, 13 May 2020, Yonghong Song wrote:

> 
> > +				while (isbtffmt(fmt[i]))
> > +					i++;
> 
> The pointer passed to the helper may not be valid pointer. I think you
> need to do a probe_read_kernel() here. Do an atomic memory allocation
> here should be okay as this is mostly for debugging only.
> 

Are there other examples of doing allocations in program execution
context? I'd hate to be the first to introduce one if not. I was hoping
I could get away with some per-CPU scratch space. Most data structures
will fit within a small per-CPU buffer, but if multiple copies
are required, performance isn't the key concern. It will make traversing
the buffer during display a bit more complex but I think avoiding 
allocation might make that complexity worth it. The other thought I had 
was we could carry out an allocation associated with the attach, 
but that's messy as it's possible run-time might determine the type for
display (and thus the amount of the buffer we need to copy safely).

Great news about LLVM support for __builtin_btf_type_id()!

Thanks!

Alan
Yonghong Song May 18, 2020, 2:47 p.m. UTC | #3
On 5/18/20 2:10 AM, Alan Maguire wrote:
> On Wed, 13 May 2020, Yonghong Song wrote:
> 
>>
>>> +				while (isbtffmt(fmt[i]))
>>> +					i++;
>>
>> The pointer passed to the helper may not be valid pointer. I think you
>> need to do a probe_read_kernel() here. Do an atomic memory allocation
>> here should be okay as this is mostly for debugging only.
>>
> 
> Are there other examples of doing allocations in program execution
> context? I'd hate to be the first to introduce one if not. I was hoping
> I could get away with some per-CPU scratch space. Most data structures
> will fit within a small per-CPU buffer, but if multiple copies
> are required, performance isn't the key concern. It will make traversing
> the buffer during display a bit more complex but I think avoiding
> allocation might make that complexity worth it. The other thought I had
> was we could carry out an allocation associated with the attach,
> but that's messy as it's possible run-time might determine the type for
> display (and thus the amount of the buffer we need to copy safely).

percpu buffer definitely better. In fact, I am using percpu buffer
in bpf_seq_printf() helper. Yes, you will need to handling contention
though. I guess we can do the same thing here, return -EBUSY so bpf
program can react properly (retry, or just print error, etc.)
if there is a contention.

> 
> Great news about LLVM support for __builtin_btf_type_id()!

Thanks. Hopefully this will make implementation easier.

> 
> Thanks!
> 
> Alan
>
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9d1932e..ab3c86c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -695,7 +695,12 @@  struct bpf_stack_build_id {
  * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
  * 		available. It can take up to three additional **u64**
  * 		arguments (as an eBPF helpers, the total number of arguments is
- * 		limited to five).
+ *		limited to five), and also supports %pT (BTF-based type
+ *		printing), as long as BPF_READ lockdown is not active.
+ *		"%pT" takes a "struct __btf_ptr *" as an argument; it
+ *		consists of a pointer value and specified BTF type string or id
+ *		used to select the type for display.  For more details, see
+ *		Documentation/core-api/printk-formats.rst.
  *
  * 		Each time the helper is called, it appends a line to the trace.
  * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
@@ -731,10 +736,10 @@  struct bpf_stack_build_id {
  * 		The conversion specifiers supported by *fmt* are similar, but
  * 		more limited than for printk(). They are **%d**, **%i**,
  * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
- * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
- * 		of field, padding with zeroes, etc.) is available, and the
- * 		helper will return **-EINVAL** (but print nothing) if it
- * 		encounters an unknown specifier.
+ *		**%lli**, **%llu**, **%llx**, **%p**, **%pT[cNx0], **%s**.
+ *		Only %pT supports modifiers, and the helper will return
+ *		**-EINVAL** (but print nothing) if it encouters an unknown
+ *		specifier.
  *
  * 		Also, note that **bpf_trace_printk**\ () is slow, and should
  * 		only be used for debugging purposes. For this reason, a notice
@@ -4058,4 +4063,16 @@  struct bpf_pidns_info {
 	__u32 pid;
 	__u32 tgid;
 };
+
+/*
+ * struct __btf_ptr is used for %pT (typed pointer) display; the
+ * additional type string/BTF id are used to render the pointer
+ * data as the appropriate type.
+ */
+struct __btf_ptr {
+	void *ptr;
+	const char *type;
+	__u32 id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d961428..c032c58 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -321,9 +321,12 @@  static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 	return &bpf_probe_write_user_proto;
 }
 
+#define isbtffmt(c)	\
+	(c == 'T' || c == 'c' || c == 'N' || c == 'x' || c == '0')
+
 /*
  * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pT %s
  */
 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	   u64, arg2, u64, arg3)
@@ -361,8 +364,20 @@  static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 			i++;
 		} else if (fmt[i] == 'p' || fmt[i] == 's') {
 			mod[fmt_cnt]++;
-			/* disallow any further format extensions */
-			if (fmt[i + 1] != 0 &&
+			/*
+			 * allow BTF type-based printing, and disallow any
+			 * further format extensions.
+			 */
+			if (fmt[i] == 'p' && fmt[i + 1] == 'T') {
+				int ret;
+
+				ret = security_locked_down(LOCKDOWN_BPF_READ);
+				if (unlikely(ret < 0))
+					return ret;
+				i++;
+				while (isbtffmt(fmt[i]))
+					i++;
+			} else if (fmt[i + 1] != 0 &&
 			    !isspace(fmt[i + 1]) &&
 			    !ispunct(fmt[i + 1]))
 				return -EINVAL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d1932e..ab3c86c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -695,7 +695,12 @@  struct bpf_stack_build_id {
  * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
  * 		available. It can take up to three additional **u64**
  * 		arguments (as an eBPF helpers, the total number of arguments is
- * 		limited to five).
+ *		limited to five), and also supports %pT (BTF-based type
+ *		printing), as long as BPF_READ lockdown is not active.
+ *		"%pT" takes a "struct __btf_ptr *" as an argument; it
+ *		consists of a pointer value and specified BTF type string or id
+ *		used to select the type for display.  For more details, see
+ *		Documentation/core-api/printk-formats.rst.
  *
  * 		Each time the helper is called, it appends a line to the trace.
  * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
@@ -731,10 +736,10 @@  struct bpf_stack_build_id {
  * 		The conversion specifiers supported by *fmt* are similar, but
  * 		more limited than for printk(). They are **%d**, **%i**,
  * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
- * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
- * 		of field, padding with zeroes, etc.) is available, and the
- * 		helper will return **-EINVAL** (but print nothing) if it
- * 		encounters an unknown specifier.
+ *		**%lli**, **%llu**, **%llx**, **%p**, **%pT[cNx0], **%s**.
+ *		Only %pT supports modifiers, and the helper will return
+ *		**-EINVAL** (but print nothing) if it encouters an unknown
+ *		specifier.
  *
  * 		Also, note that **bpf_trace_printk**\ () is slow, and should
  * 		only be used for debugging purposes. For this reason, a notice
@@ -4058,4 +4063,16 @@  struct bpf_pidns_info {
 	__u32 pid;
 	__u32 tgid;
 };
+
+/*
+ * struct __btf_ptr is used for %pT (typed pointer) display; the
+ * additional type string/BTF id are used to render the pointer
+ * data as the appropriate type.
+ */
+struct __btf_ptr {
+	void *ptr;
+	const char *type;
+	__u32 id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */