diff mbox series

[bpf-next,v4,02/21] bpf: allow loading of a bpf_iter program

Message ID 20200509175900.2474947-1-yhs@fb.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series bpf: implement bpf iterator for kernel data | expand

Commit Message

Yonghong Song May 9, 2020, 5:59 p.m. UTC
A bpf_iter program is a tracing program with attach type
BPF_TRACE_ITER. The load attribute
  attach_btf_id
is used by the verifier against a particular kernel function,
which represents a target, e.g., __bpf_iter__bpf_map
for target bpf_map which is implemented later.

The program return value must be 0 or 1 for now.
  0 : successful, except potential seq_file buffer overflow
      which is handled by seq_file reader.
  1 : request to restart the same object

In the future, other return values may be used for filtering or
teminating the iterator.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |  3 +++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/bpf_iter.c          | 36 ++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c          | 21 ++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 5 files changed, 62 insertions(+)

Comments

Alexei Starovoitov May 10, 2020, 12:41 a.m. UTC | #1
On Sat, May 09, 2020 at 10:59:00AM -0700, Yonghong Song wrote:
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 70ad009577f8..d725ff7d11db 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
>  			return 0;
>  		range = tnum_const(0);
>  		break;
> +	case BPF_PROG_TYPE_TRACING:
> +		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
> +			return 0;
> +		break;

Not related to this set, but I just noticed that I managed to forget to
add this check for fentry/fexit/freplace.
While it's not too late let's enforce return 0 for them ?
Could you follow up with a patch for bpf tree?
Yonghong Song May 10, 2020, 5:07 a.m. UTC | #2
On 5/9/20 5:41 PM, Alexei Starovoitov wrote:
> On Sat, May 09, 2020 at 10:59:00AM -0700, Yonghong Song wrote:
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 70ad009577f8..d725ff7d11db 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
>>   			return 0;
>>   		range = tnum_const(0);
>>   		break;
>> +	case BPF_PROG_TYPE_TRACING:
>> +		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
>> +			return 0;
>> +		break;
> 
> Not related to this set, but I just noticed that I managed to forget to
> add this check for fentry/fexit/freplace.
> While it's not too late let's enforce return 0 for them ?
> Could you follow up with a patch for bpf tree?

Sure. I can have a followup patch for this.
Yonghong Song May 12, 2020, 3:41 p.m. UTC | #3
On 5/9/20 5:41 PM, Alexei Starovoitov wrote:
> On Sat, May 09, 2020 at 10:59:00AM -0700, Yonghong Song wrote:
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 70ad009577f8..d725ff7d11db 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
>>   			return 0;
>>   		range = tnum_const(0);
>>   		break;
>> +	case BPF_PROG_TYPE_TRACING:
>> +		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
>> +			return 0;
>> +		break;
> 
> Not related to this set, but I just noticed that I managed to forget to
> add this check for fentry/fexit/freplace.
> While it's not too late let's enforce return 0 for them ?
> Could you follow up with a patch for bpf tree?

Just want to double check. In selftests, we have

SEC("fentry/__set_task_comm")
int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
{
         return !tsk;
}

SEC("fexit/__set_task_comm")
int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
{
         return !tsk;
}

fentry/fexit may returrn 1. What is the intention here? Does this mean
we should allow [0, 1] instead of [0, 0]?

For freplace, we have

__u64 test_get_skb_len = 0;
SEC("freplace/get_skb_len")
int new_get_skb_len(struct __sk_buff *skb)
{
         int len = skb->len;

         if (len != 74)
                 return 0;
         test_get_skb_len = 1;
         return 74; /* original get_skb_len() returns skb->len */
}

That means freplace may return arbitrary values depending on what
to replace?
Alexei Starovoitov May 12, 2020, 4:25 p.m. UTC | #4
On Tue, May 12, 2020 at 08:41:19AM -0700, Yonghong Song wrote:
> 
> 
> On 5/9/20 5:41 PM, Alexei Starovoitov wrote:
> > On Sat, May 09, 2020 at 10:59:00AM -0700, Yonghong Song wrote:
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 70ad009577f8..d725ff7d11db 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
> > >   			return 0;
> > >   		range = tnum_const(0);
> > >   		break;
> > > +	case BPF_PROG_TYPE_TRACING:
> > > +		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
> > > +			return 0;
> > > +		break;
> > 
> > Not related to this set, but I just noticed that I managed to forget to
> > add this check for fentry/fexit/freplace.
> > While it's not too late let's enforce return 0 for them ?
> > Could you follow up with a patch for bpf tree?
> 
> Just want to double check. In selftests, we have
> 
> SEC("fentry/__set_task_comm")
> int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
> {
>         return !tsk;
> }
> 
> SEC("fexit/__set_task_comm")
> int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
> {
>         return !tsk;
> }
> 
> fentry/fexit may returrn 1. What is the intention here? Does this mean
> we should allow [0, 1] instead of [0, 0]?

Argh. I missed that bit when commit ac065870d9282 tweaked the return
value. For fentry/exit the return value is ignored by trampoline.
imo it's misleading to users and should be rejected by the verifier.
so [0,0] for fentry/fexit

> For freplace, we have
> 
> __u64 test_get_skb_len = 0;
> SEC("freplace/get_skb_len")
> int new_get_skb_len(struct __sk_buff *skb)
> {
>         int len = skb->len;
> 
>         if (len != 74)
>                 return 0;
>         test_get_skb_len = 1;
>         return 74; /* original get_skb_len() returns skb->len */
> }
> 
> That means freplace may return arbitrary values depending on what
> to replace?

yes. freplace and fmod_ret can return anything.
Yonghong Song May 12, 2020, 4:29 p.m. UTC | #5
On 5/12/20 9:25 AM, Alexei Starovoitov wrote:
> On Tue, May 12, 2020 at 08:41:19AM -0700, Yonghong Song wrote:
>>
>>
>> On 5/9/20 5:41 PM, Alexei Starovoitov wrote:
>>> On Sat, May 09, 2020 at 10:59:00AM -0700, Yonghong Song wrote:
>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>> index 70ad009577f8..d725ff7d11db 100644
>>>> --- a/kernel/bpf/verifier.c
>>>> +++ b/kernel/bpf/verifier.c
>>>> @@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
>>>>    			return 0;
>>>>    		range = tnum_const(0);
>>>>    		break;
>>>> +	case BPF_PROG_TYPE_TRACING:
>>>> +		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
>>>> +			return 0;
>>>> +		break;
>>>
>>> Not related to this set, but I just noticed that I managed to forget to
>>> add this check for fentry/fexit/freplace.
>>> While it's not too late let's enforce return 0 for them ?
>>> Could you follow up with a patch for bpf tree?
>>
>> Just want to double check. In selftests, we have
>>
>> SEC("fentry/__set_task_comm")
>> int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
>> {
>>          return !tsk;
>> }
>>
>> SEC("fexit/__set_task_comm")
>> int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
>> {
>>          return !tsk;
>> }
>>
>> fentry/fexit may returrn 1. What is the intention here? Does this mean
>> we should allow [0, 1] instead of [0, 0]?
> 
> Argh. I missed that bit when commit ac065870d9282 tweaked the return
> value. For fentry/exit the return value is ignored by trampoline.
> imo it's misleading to users and should be rejected by the verifier.
> so [0,0] for fentry/fexit

Sounds good. Will craft patch to enforce fentry/fexit with [0,0] then.
Thanks!

> 
>> For freplace, we have
>>
>> __u64 test_get_skb_len = 0;
>> SEC("freplace/get_skb_len")
>> int new_get_skb_len(struct __sk_buff *skb)
>> {
>>          int len = skb->len;
>>
>>          if (len != 74)
>>                  return 0;
>>          test_get_skb_len = 1;
>>          return 74; /* original get_skb_len() returns skb->len */
>> }
>>
>> That means freplace may return arbitrary values depending on what
>> to replace?
> 
> yes. freplace and fmod_ret can return anything.
>
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 40c78b86fe38..f28bdd714754 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1127,6 +1127,8 @@  struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
 
@@ -1140,6 +1142,7 @@  struct bpf_iter_reg {
 
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
+bool bpf_iter_prog_supported(struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6e5e7caa3739..c8a5325cc8d0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -218,6 +218,7 @@  enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 5a8119d17d14..dec182d8395a 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -12,6 +12,7 @@  struct bpf_iter_target_info {
 	bpf_iter_init_seq_priv_t init_seq_private;
 	bpf_iter_fini_seq_priv_t fini_seq_private;
 	u32 seq_priv_size;
+	u32 btf_id;	/* cached value */
 };
 
 static struct list_head targets = LIST_HEAD_INIT(targets);
@@ -57,3 +58,38 @@  void bpf_iter_unreg_target(const char *target)
 
 	WARN_ON(found == false);
 }
+
+static void cache_btf_id(struct bpf_iter_target_info *tinfo,
+			 struct bpf_prog *prog)
+{
+	tinfo->btf_id = prog->aux->attach_btf_id;
+}
+
+bool bpf_iter_prog_supported(struct bpf_prog *prog)
+{
+	const char *attach_fname = prog->aux->attach_func_name;
+	u32 prog_btf_id = prog->aux->attach_btf_id;
+	const char *prefix = BPF_ITER_FUNC_PREFIX;
+	struct bpf_iter_target_info *tinfo;
+	int prefix_len = strlen(prefix);
+	bool supported = false;
+
+	if (strncmp(attach_fname, prefix, prefix_len))
+		return false;
+
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
+			supported = true;
+			break;
+		}
+		if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
+			cache_btf_id(tinfo, prog);
+			supported = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+
+	return supported;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 70ad009577f8..d725ff7d11db 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7101,6 +7101,10 @@  static int check_return_code(struct bpf_verifier_env *env)
 			return 0;
 		range = tnum_const(0);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
+			return 0;
+		break;
 	default:
 		return 0;
 	}
@@ -10481,6 +10485,7 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
 	u32 btf_id = prog->aux->attach_btf_id;
 	const char prefix[] = "btf_trace_";
+	struct btf_func_model fmodel;
 	int ret = 0, subprog = -1, i;
 	struct bpf_trampoline *tr;
 	const struct btf_type *t;
@@ -10622,6 +10627,22 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		prog->aux->attach_btf_trace = true;
 		return 0;
+	case BPF_TRACE_ITER:
+		if (!btf_type_is_func(t)) {
+			verbose(env, "attach_btf_id %u is not a function\n",
+				btf_id);
+			return -EINVAL;
+		}
+		t = btf_type_by_id(btf, t->type);
+		if (!btf_type_is_func_proto(t))
+			return -EINVAL;
+		prog->aux->attach_func_name = tname;
+		prog->aux->attach_func_proto = t;
+		if (!bpf_iter_prog_supported(prog))
+			return -EINVAL;
+		ret = btf_distill_func_proto(&env->log, btf, t,
+					     tname, &fmodel);
+		return ret;
 	default:
 		if (!prog_extension)
 			return -EINVAL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6e5e7caa3739..c8a5325cc8d0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -218,6 +218,7 @@  enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
 	__MAX_BPF_ATTACH_TYPE
 };