diff mbox series

[bpf-next,v4,12/21] bpf: add PTR_TO_BTF_ID_OR_NULL support

Message ID 20200509175912.2476576-1-yhs@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 9, 2020, 5:59 p.m. UTC
Add bpf_reg_type PTR_TO_BTF_ID_OR_NULL support.
For tracing/iter program, the bpf program context
definition, e.g., for previous bpf_map target, looks like
  struct bpf_iter__bpf_map {
    struct bpf_iter_meta *meta;
    struct bpf_map *map;
  };

The kernel guarantees that meta is not NULL, but
map pointer maybe NULL. The NULL map indicates that all
objects have been traversed, so bpf program can take
proper action, e.g., do final aggregation and/or send
final report to user space.

Add btf_id_or_null_non0_off to prog->aux structure, to
indicate that if the context access offset is not 0,
set to PTR_TO_BTF_ID_OR_NULL instead of PTR_TO_BTF_ID.
This bit is set for tracing/iter program.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/btf.c      |  5 ++++-
 kernel/bpf/verifier.c | 16 ++++++++++++----
 3 files changed, 18 insertions(+), 5 deletions(-)

Comments

Alexei Starovoitov May 10, 2020, 12:50 a.m. UTC | #1
On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote:
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index a2cfba89a8e1..c490fbde22d4 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>  		return true;
>  
>  	/* this is a pointer to another type */
> -	info->reg_type = PTR_TO_BTF_ID;
> +	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
> +		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
> +	else
> +		info->reg_type = PTR_TO_BTF_ID;

I think the verifier should be smarter than this.
It's too specific and inflexible. All ctx fields of bpf_iter execpt first
will be such ? let's figure out a different way to tell verifier about this.
How about using typedef with specific suffix? Like:
typedef struct bpf_map *bpf_map_or_null;
 struct bpf_iter__bpf_map {
   struct bpf_iter_meta *meta;
   bpf_map_or_null map;
 };
or use a union with specific second member? Like:
 struct bpf_iter__bpf_map {
   struct bpf_iter_meta *meta;
   union {
     struct bpf_map *map;
     long null;
   };
 };
Yonghong Song May 10, 2020, 5:18 a.m. UTC | #2
On 5/9/20 5:50 PM, Alexei Starovoitov wrote:
> On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote:
>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>> index a2cfba89a8e1..c490fbde22d4 100644
>> --- a/kernel/bpf/btf.c
>> +++ b/kernel/bpf/btf.c
>> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>>   		return true;
>>   
>>   	/* this is a pointer to another type */
>> -	info->reg_type = PTR_TO_BTF_ID;
>> +	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
>> +		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
>> +	else
>> +		info->reg_type = PTR_TO_BTF_ID;
> 
> I think the verifier should be smarter than this.
> It's too specific and inflexible. All ctx fields of bpf_iter execpt first
> will be such ? let's figure out a different way to tell verifier about this.
> How about using typedef with specific suffix? Like:
> typedef struct bpf_map *bpf_map_or_null;
>   struct bpf_iter__bpf_map {
>     struct bpf_iter_meta *meta;
>     bpf_map_or_null map;
>   };
> or use a union with specific second member? Like:
>   struct bpf_iter__bpf_map {
>     struct bpf_iter_meta *meta;
>     union {
>       struct bpf_map *map;
>       long null;
>     };
>   };

I have an alternative approach to refactor this for future
support for map elements as well.

For example, for bpf_map_elements iterator the prog context type
can be
     struct bpf_iter_bpf_map_elem {
  	struct bpf_iter_meta *meta;
	strruct bpf_map *map;
	<key type>  *key;
	<value type> *val;
    };

target will pass the following information to bpf_iter registration:
    arg 1: PTR_TO_BTF_ID
    arg 2: PTR_TO_BTF_ID_OR_NULL
    arg 3: PTR_TO_BUFFER
    arg 4: PTR_TO_BUFFER

verifier will retrieve the reg_type from target.
Alexei Starovoitov May 10, 2020, 4:11 p.m. UTC | #3
On Sat, May 9, 2020 at 10:19 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/9/20 5:50 PM, Alexei Starovoitov wrote:
> > On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote:
> >> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> >> index a2cfba89a8e1..c490fbde22d4 100644
> >> --- a/kernel/bpf/btf.c
> >> +++ b/kernel/bpf/btf.c
> >> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
> >>              return true;
> >>
> >>      /* this is a pointer to another type */
> >> -    info->reg_type = PTR_TO_BTF_ID;
> >> +    if (off != 0 && prog->aux->btf_id_or_null_non0_off)
> >> +            info->reg_type = PTR_TO_BTF_ID_OR_NULL;
> >> +    else
> >> +            info->reg_type = PTR_TO_BTF_ID;
> >
> > I think the verifier should be smarter than this.
> > It's too specific and inflexible. All ctx fields of bpf_iter execpt first
> > will be such ? let's figure out a different way to tell verifier about this.
> > How about using typedef with specific suffix? Like:
> > typedef struct bpf_map *bpf_map_or_null;
> >   struct bpf_iter__bpf_map {
> >     struct bpf_iter_meta *meta;
> >     bpf_map_or_null map;
> >   };
> > or use a union with specific second member? Like:
> >   struct bpf_iter__bpf_map {
> >     struct bpf_iter_meta *meta;
> >     union {
> >       struct bpf_map *map;
> >       long null;
> >     };
> >   };
>
> I have an alternative approach to refactor this for future
> support for map elements as well.
>
> For example, for bpf_map_elements iterator the prog context type
> can be
>      struct bpf_iter_bpf_map_elem {
>         struct bpf_iter_meta *meta;
>         strruct bpf_map *map;
>         <key type>  *key;
>         <value type> *val;
>     };
>
> target will pass the following information to bpf_iter registration:
>     arg 1: PTR_TO_BTF_ID
>     arg 2: PTR_TO_BTF_ID_OR_NULL
>     arg 3: PTR_TO_BUFFER
>     arg 4: PTR_TO_BUFFER
>
> verifier will retrieve the reg_type from target.

you mean to introduce something like 'struct bpf_func_proto'
that describes types of helpers, but instead something similar
to clarify the types in ctx ? That should work. Thanks
Yonghong Song May 10, 2020, 5:05 p.m. UTC | #4
On 5/10/20 9:11 AM, Alexei Starovoitov wrote:
> On Sat, May 9, 2020 at 10:19 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 5/9/20 5:50 PM, Alexei Starovoitov wrote:
>>> On Sat, May 09, 2020 at 10:59:12AM -0700, Yonghong Song wrote:
>>>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>>>> index a2cfba89a8e1..c490fbde22d4 100644
>>>> --- a/kernel/bpf/btf.c
>>>> +++ b/kernel/bpf/btf.c
>>>> @@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>>>>               return true;
>>>>
>>>>       /* this is a pointer to another type */
>>>> -    info->reg_type = PTR_TO_BTF_ID;
>>>> +    if (off != 0 && prog->aux->btf_id_or_null_non0_off)
>>>> +            info->reg_type = PTR_TO_BTF_ID_OR_NULL;
>>>> +    else
>>>> +            info->reg_type = PTR_TO_BTF_ID;
>>>
>>> I think the verifier should be smarter than this.
>>> It's too specific and inflexible. All ctx fields of bpf_iter execpt first
>>> will be such ? let's figure out a different way to tell verifier about this.
>>> How about using typedef with specific suffix? Like:
>>> typedef struct bpf_map *bpf_map_or_null;
>>>    struct bpf_iter__bpf_map {
>>>      struct bpf_iter_meta *meta;
>>>      bpf_map_or_null map;
>>>    };
>>> or use a union with specific second member? Like:
>>>    struct bpf_iter__bpf_map {
>>>      struct bpf_iter_meta *meta;
>>>      union {
>>>        struct bpf_map *map;
>>>        long null;
>>>      };
>>>    };
>>
>> I have an alternative approach to refactor this for future
>> support for map elements as well.
>>
>> For example, for bpf_map_elements iterator the prog context type
>> can be
>>       struct bpf_iter_bpf_map_elem {
>>          struct bpf_iter_meta *meta;
>>          strruct bpf_map *map;
>>          <key type>  *key;
>>          <value type> *val;
>>      };
>>
>> target will pass the following information to bpf_iter registration:
>>      arg 1: PTR_TO_BTF_ID
>>      arg 2: PTR_TO_BTF_ID_OR_NULL
>>      arg 3: PTR_TO_BUFFER
>>      arg 4: PTR_TO_BUFFER
>>
>> verifier will retrieve the reg_type from target.
> 
> you mean to introduce something like 'struct bpf_func_proto'
> that describes types of helpers, but instead something similar
> to clarify the types in ctx ? That should work. Thanks

Yes, this is what I think will be extensible.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 363ab0751967..cf4b6e44f2bc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -320,6 +320,7 @@  enum bpf_reg_type {
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 	PTR_TO_BTF_ID,		 /* reg points to kernel struct */
+	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -658,6 +659,7 @@  struct bpf_prog_aux {
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
+	bool btf_id_or_null_non0_off;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a2cfba89a8e1..c490fbde22d4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3790,7 +3790,10 @@  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		return true;
 
 	/* this is a pointer to another type */
-	info->reg_type = PTR_TO_BTF_ID;
+	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
+		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
+	else
+		info->reg_type = PTR_TO_BTF_ID;
 
 	if (tgt_prog) {
 		ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d725ff7d11db..36b2a38a06fe 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -398,7 +398,8 @@  static bool reg_type_may_be_null(enum bpf_reg_type type)
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
 	       type == PTR_TO_SOCKET_OR_NULL ||
 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
-	       type == PTR_TO_TCP_SOCK_OR_NULL;
+	       type == PTR_TO_TCP_SOCK_OR_NULL ||
+	       type == PTR_TO_BTF_ID_OR_NULL;
 }
 
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -483,6 +484,7 @@  static const char * const reg_type_str[] = {
 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 	[PTR_TO_BTF_ID]		= "ptr_",
+	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
 };
 
 static char slot_type_char[] = {
@@ -543,7 +545,7 @@  static void print_verifier_state(struct bpf_verifier_env *env,
 			/* reg->off should be 0 for SCALAR_VALUE */
 			verbose(env, "%lld", reg->var_off.value + reg->off);
 		} else {
-			if (t == PTR_TO_BTF_ID)
+			if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
 				verbose(env, "%s", kernel_type_name(reg->btf_id));
 			verbose(env, "(id=%d", reg->id);
 			if (reg_type_may_be_refcounted_or_null(t))
@@ -2139,6 +2141,7 @@  static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
 	case PTR_TO_BTF_ID:
+	case PTR_TO_BTF_ID_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -2659,7 +2662,7 @@  static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 		 */
 		*reg_type = info.reg_type;
 
-		if (*reg_type == PTR_TO_BTF_ID)
+		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
 			*btf_id = info.btf_id;
 		else
 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
@@ -3243,7 +3246,8 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				 * a sub-register.
 				 */
 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
-				if (reg_type == PTR_TO_BTF_ID)
+				if (reg_type == PTR_TO_BTF_ID ||
+				    reg_type == PTR_TO_BTF_ID_OR_NULL)
 					regs[value_regno].btf_id = btf_id;
 			}
 			regs[value_regno].type = reg_type;
@@ -6572,6 +6576,8 @@  static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			reg->type = PTR_TO_SOCK_COMMON;
 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
 			reg->type = PTR_TO_TCP_SOCK;
+		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
+			reg->type = PTR_TO_BTF_ID;
 		}
 		if (is_null) {
 			/* We don't need id and ref_obj_id from this point
@@ -8429,6 +8435,7 @@  static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
 	case PTR_TO_BTF_ID:
+	case PTR_TO_BTF_ID_OR_NULL:
 		return false;
 	default:
 		return true;
@@ -10640,6 +10647,7 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		if (!bpf_iter_prog_supported(prog))
 			return -EINVAL;
+		prog->aux->btf_id_or_null_non0_off = true;
 		ret = btf_distill_func_proto(&env->log, btf, t,
 					     tname, &fmodel);
 		return ret;