[net-next,v4,2/3] bpf: Add bpf_current_task_under_cgroup helper
diff mbox

Message ID 20160812031454.GA2075@ircssh.c.rugged-nimbus-611.internal
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Sargun Dhillon Aug. 12, 2016, 3:14 a.m. UTC
This adds a bpf helper that's similar to the skb_in_cgroup helper to check
whether the probe is currently executing in the context of a specific
subset of the cgroupsv2 hierarchy. It does this based on membership test
for a cgroup arraymap. It is invalid to call this in an interrupt, and
it'll return an error. The helper is primarily to be used in debugging
activities for containers, where you may have multiple programs running in
a given top-level "container".

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 kernel/bpf/arraymap.c    |  2 +-
 kernel/bpf/verifier.c    |  4 +++-
 kernel/trace/bpf_trace.c | 30 ++++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 2 deletions(-)

Comments

Alexei Starovoitov Aug. 12, 2016, 4:48 a.m. UTC | #1
On Thu, Aug 11, 2016 at 08:14:56PM -0700, Sargun Dhillon wrote:
> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> whether the probe is currently executing in the context of a specific
> subset of the cgroupsv2 hierarchy. It does this based on membership test
> for a cgroup arraymap. It is invalid to call this in an interrupt, and
> it'll return an error. The helper is primarily to be used in debugging
> activities for containers, where you may have multiple programs running in
> a given top-level "container".
> 
> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Tejun Heo <tj@kernel.org>
> ---
> +	/**
> +	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
> +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> +	 * @index: index of the cgroup in the bpf_map
> +	 * Return:
> +	 *   == 0 current failed the cgroup2 descendant test
> +	 *   == 1 current succeeded the cgroup2 descendant test
> +	 *    < 0 error
> +	 */
> +	BPF_FUNC_current_task_under_cgroup,
..
>  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> -		if (func_id != BPF_FUNC_skb_in_cgroup)
> +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> +		    func_id != BPF_FUNC_current_task_under_cgroup)
>  			goto error;
...
> +	case BPF_FUNC_current_task_under_cgroup:
>  	case BPF_FUNC_skb_in_cgroup:

Tejun,
do you feel strongly about 'under' ?
It just looks inconsistent vs existing skb_in_cgroup...
"in cgroup" - 4k google hits
"under cgroup" - 2k google hits
Sargun Dhillon Aug. 12, 2016, 4:50 a.m. UTC | #2
I realize that in_cgroup is more consistent, but under_cgroup makes
far more sense to me. I think it's more intuitive.

On Thu, Aug 11, 2016 at 9:48 PM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Thu, Aug 11, 2016 at 08:14:56PM -0700, Sargun Dhillon wrote:
>> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
>> whether the probe is currently executing in the context of a specific
>> subset of the cgroupsv2 hierarchy. It does this based on membership test
>> for a cgroup arraymap. It is invalid to call this in an interrupt, and
>> it'll return an error. The helper is primarily to be used in debugging
>> activities for containers, where you may have multiple programs running in
>> a given top-level "container".
>>
>> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
>> Cc: Alexei Starovoitov <ast@kernel.org>
>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>> Cc: Tejun Heo <tj@kernel.org>
>> ---
>> +     /**
>> +      * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
>> +      * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
>> +      * @index: index of the cgroup in the bpf_map
>> +      * Return:
>> +      *   == 0 current failed the cgroup2 descendant test
>> +      *   == 1 current succeeded the cgroup2 descendant test
>> +      *    < 0 error
>> +      */
>> +     BPF_FUNC_current_task_under_cgroup,
> ..
>>       case BPF_MAP_TYPE_CGROUP_ARRAY:
>> -             if (func_id != BPF_FUNC_skb_in_cgroup)
>> +             if (func_id != BPF_FUNC_skb_in_cgroup &&
>> +                 func_id != BPF_FUNC_current_task_under_cgroup)
>>                       goto error;
> ...
>> +     case BPF_FUNC_current_task_under_cgroup:
>>       case BPF_FUNC_skb_in_cgroup:
>
> Tejun,
> do you feel strongly about 'under' ?
> It just looks inconsistent vs existing skb_in_cgroup...
> "in cgroup" - 4k google hits
> "under cgroup" - 2k google hits
>
Daniel Borkmann Aug. 12, 2016, 7:16 a.m. UTC | #3
On 08/12/2016 06:50 AM, Sargun Dhillon wrote:
> I realize that in_cgroup is more consistent, but under_cgroup makes
> far more sense to me. I think it's more intuitive.
>
> On Thu, Aug 11, 2016 at 9:48 PM, Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
>> On Thu, Aug 11, 2016 at 08:14:56PM -0700, Sargun Dhillon wrote:
>>> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
>>> whether the probe is currently executing in the context of a specific
>>> subset of the cgroupsv2 hierarchy. It does this based on membership test
>>> for a cgroup arraymap. It is invalid to call this in an interrupt, and
>>> it'll return an error. The helper is primarily to be used in debugging
>>> activities for containers, where you may have multiple programs running in
>>> a given top-level "container".
>>>
>>> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
>>> Cc: Alexei Starovoitov <ast@kernel.org>
>>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>>> Cc: Tejun Heo <tj@kernel.org>
>>> ---
>>> +     /**
>>> +      * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
>>> +      * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
>>> +      * @index: index of the cgroup in the bpf_map
>>> +      * Return:
>>> +      *   == 0 current failed the cgroup2 descendant test
>>> +      *   == 1 current succeeded the cgroup2 descendant test
>>> +      *    < 0 error
>>> +      */
>>> +     BPF_FUNC_current_task_under_cgroup,
>> ..
>>>        case BPF_MAP_TYPE_CGROUP_ARRAY:
>>> -             if (func_id != BPF_FUNC_skb_in_cgroup)
>>> +             if (func_id != BPF_FUNC_skb_in_cgroup &&
>>> +                 func_id != BPF_FUNC_current_task_under_cgroup)
>>>                        goto error;
>> ...
>>> +     case BPF_FUNC_current_task_under_cgroup:
>>>        case BPF_FUNC_skb_in_cgroup:
>>
>> Tejun,
>> do you feel strongly about 'under' ?
>> It just looks inconsistent vs existing skb_in_cgroup...
>> "in cgroup" - 4k google hits
>> "under cgroup" - 2k google hits

Alternative could be that we take "BPF_FUNC_current_in_cgroup" as a
helper enum to keep consistency with what we have wrt skb helper, but
for the cgroup header have the suggested task_under_cgroup_hierarchy()
name.
Sargun Dhillon Aug. 12, 2016, 7:22 a.m. UTC | #4
On Fri, Aug 12, 2016 at 09:16:07AM +0200, Daniel Borkmann wrote:
> On 08/12/2016 06:50 AM, Sargun Dhillon wrote:
> >I realize that in_cgroup is more consistent, but under_cgroup makes
> >far more sense to me. I think it's more intuitive.
> >
> >On Thu, Aug 11, 2016 at 9:48 PM, Alexei Starovoitov
> ><alexei.starovoitov@gmail.com> wrote:
> >>On Thu, Aug 11, 2016 at 08:14:56PM -0700, Sargun Dhillon wrote:
> >>>This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> >>>whether the probe is currently executing in the context of a specific
> >>>subset of the cgroupsv2 hierarchy. It does this based on membership test
> >>>for a cgroup arraymap. It is invalid to call this in an interrupt, and
> >>>it'll return an error. The helper is primarily to be used in debugging
> >>>activities for containers, where you may have multiple programs running in
> >>>a given top-level "container".
> >>>
> >>>Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> >>>Cc: Alexei Starovoitov <ast@kernel.org>
> >>>Cc: Daniel Borkmann <daniel@iogearbox.net>
> >>>Cc: Tejun Heo <tj@kernel.org>
> >>>---
> >>>+     /**
> >>>+      * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
> >>>+      * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> >>>+      * @index: index of the cgroup in the bpf_map
> >>>+      * Return:
> >>>+      *   == 0 current failed the cgroup2 descendant test
> >>>+      *   == 1 current succeeded the cgroup2 descendant test
> >>>+      *    < 0 error
> >>>+      */
> >>>+     BPF_FUNC_current_task_under_cgroup,
> >>..
> >>>       case BPF_MAP_TYPE_CGROUP_ARRAY:
> >>>-             if (func_id != BPF_FUNC_skb_in_cgroup)
> >>>+             if (func_id != BPF_FUNC_skb_in_cgroup &&
> >>>+                 func_id != BPF_FUNC_current_task_under_cgroup)
> >>>                       goto error;
> >>...
> >>>+     case BPF_FUNC_current_task_under_cgroup:
> >>>       case BPF_FUNC_skb_in_cgroup:
> >>
> >>Tejun,
> >>do you feel strongly about 'under' ?
> >>It just looks inconsistent vs existing skb_in_cgroup...
> >>"in cgroup" - 4k google hits
> >>"under cgroup" - 2k google hits
> 
> Alternative could be that we take "BPF_FUNC_current_in_cgroup" as a
> helper enum to keep consistency with what we have wrt skb helper, but
> for the cgroup header have the suggested task_under_cgroup_hierarchy()
> name.

I actually wish we could rename skb_in_cgroup to skb_under_cgroup. If we ever 
introduced a check for absolute membership versus ancestral membership, what 
would we call that?
Daniel Borkmann Aug. 12, 2016, 7:40 a.m. UTC | #5
On 08/12/2016 09:22 AM, Sargun Dhillon wrote:
> On Fri, Aug 12, 2016 at 09:16:07AM +0200, Daniel Borkmann wrote:
>> On 08/12/2016 06:50 AM, Sargun Dhillon wrote:
>>> I realize that in_cgroup is more consistent, but under_cgroup makes
>>> far more sense to me. I think it's more intuitive.
>>>
>>> On Thu, Aug 11, 2016 at 9:48 PM, Alexei Starovoitov
>>> <alexei.starovoitov@gmail.com> wrote:
>>>> On Thu, Aug 11, 2016 at 08:14:56PM -0700, Sargun Dhillon wrote:
>>>>> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
>>>>> whether the probe is currently executing in the context of a specific
>>>>> subset of the cgroupsv2 hierarchy. It does this based on membership test
>>>>> for a cgroup arraymap. It is invalid to call this in an interrupt, and
>>>>> it'll return an error. The helper is primarily to be used in debugging
>>>>> activities for containers, where you may have multiple programs running in
>>>>> a given top-level "container".
>>>>>
>>>>> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
>>>>> Cc: Alexei Starovoitov <ast@kernel.org>
>>>>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>>>>> Cc: Tejun Heo <tj@kernel.org>
>>>>> ---
>>>>> +     /**
>>>>> +      * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
>>>>> +      * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
>>>>> +      * @index: index of the cgroup in the bpf_map
>>>>> +      * Return:
>>>>> +      *   == 0 current failed the cgroup2 descendant test
>>>>> +      *   == 1 current succeeded the cgroup2 descendant test
>>>>> +      *    < 0 error
>>>>> +      */
>>>>> +     BPF_FUNC_current_task_under_cgroup,
>>>> ..
>>>>>        case BPF_MAP_TYPE_CGROUP_ARRAY:
>>>>> -             if (func_id != BPF_FUNC_skb_in_cgroup)
>>>>> +             if (func_id != BPF_FUNC_skb_in_cgroup &&
>>>>> +                 func_id != BPF_FUNC_current_task_under_cgroup)
>>>>>                        goto error;
>>>> ...
>>>>> +     case BPF_FUNC_current_task_under_cgroup:
>>>>>        case BPF_FUNC_skb_in_cgroup:
>>>>
>>>> Tejun,
>>>> do you feel strongly about 'under' ?
>>>> It just looks inconsistent vs existing skb_in_cgroup...
>>>> "in cgroup" - 4k google hits
>>>> "under cgroup" - 2k google hits
>>
>> Alternative could be that we take "BPF_FUNC_current_in_cgroup" as a
>> helper enum to keep consistency with what we have wrt skb helper, but
>> for the cgroup header have the suggested task_under_cgroup_hierarchy()
>> name.
>
> I actually wish we could rename skb_in_cgroup to skb_under_cgroup. If we ever
> introduced a check for absolute membership versus ancestral membership, what
> would we call that?

That option is, by the way, still on the table for -net tree, since 4.8 is not
released yet, so it could still be renamed into BPF_FUNC_skb_under_cgroup.

Then you could make this one here for -net-next as "BPF_FUNC_current_under_cgroup".

Tejun, Alexei?
Tejun Heo Aug. 12, 2016, 1:21 p.m. UTC | #6
On Thu, Aug 11, 2016 at 09:50:48PM -0700, Sargun Dhillon wrote:
> I realize that in_cgroup is more consistent, but under_cgroup makes
> far more sense to me. I think it's more intuitive.

So, I think in_cgroup should mean that the object is in that
particular cgroup while under_cgroup in the subhierarchy of that
cgroup.  Let's rename the other subhierarchy test to under too.  I
think that'd be a lot less confusing going forward.

Thanks.
Tejun Heo Aug. 12, 2016, 1:23 p.m. UTC | #7
On Fri, Aug 12, 2016 at 09:21:39AM -0400, Tejun Heo wrote:
> On Thu, Aug 11, 2016 at 09:50:48PM -0700, Sargun Dhillon wrote:
> > I realize that in_cgroup is more consistent, but under_cgroup makes
> > far more sense to me. I think it's more intuitive.
> 
> So, I think in_cgroup should mean that the object is in that
> particular cgroup while under_cgroup in the subhierarchy of that
> cgroup.  Let's rename the other subhierarchy test to under too.  I
> think that'd be a lot less confusing going forward.

Ah, I suppose the bpf part is userland visible?  If so, there isn't
much we can do and probably best to stick with in_cgroup for that
part.  Bummer but no big deal.

Thanks.
Tejun Heo Aug. 12, 2016, 1:29 p.m. UTC | #8
Hello,

On Fri, Aug 12, 2016 at 09:40:39AM +0200, Daniel Borkmann wrote:
> > I actually wish we could rename skb_in_cgroup to skb_under_cgroup. If we ever
> > introduced a check for absolute membership versus ancestral membership, what
> > would we call that?
> 
> That option is, by the way, still on the table for -net tree, since 4.8 is not
> released yet, so it could still be renamed into BPF_FUNC_skb_under_cgroup.
> 
> Then you could make this one here for -net-next as "BPF_FUNC_current_under_cgroup".
> 
> Tejun, Alexei?

lol I should have read the whole thread before replying twice.  Sorry
about that.  Yeah, if we can still rename it, let's do "under".  It's
more intuitive and gives us the room to implement the real "in" test
if ever necessary in the future.

Thanks.
Alexei Starovoitov Aug. 12, 2016, 3:21 p.m. UTC | #9
On Fri, Aug 12, 2016 at 09:29:35AM -0400, Tejun Heo wrote:
> Hello,
> 
> On Fri, Aug 12, 2016 at 09:40:39AM +0200, Daniel Borkmann wrote:
> > > I actually wish we could rename skb_in_cgroup to skb_under_cgroup. If we ever
> > > introduced a check for absolute membership versus ancestral membership, what
> > > would we call that?
> > 
> > That option is, by the way, still on the table for -net tree, since 4.8 is not
> > released yet, so it could still be renamed into BPF_FUNC_skb_under_cgroup.
> > 
> > Then you could make this one here for -net-next as "BPF_FUNC_current_under_cgroup".
> > 
> > Tejun, Alexei?
> 
> lol I should have read the whole thread before replying twice.  Sorry
> about that.  Yeah, if we can still rename it, let's do "under".  It's
> more intuitive and gives us the room to implement the real "in" test
> if ever necessary in the future.

agree. Thanks for explaining 'in' vs 'under' terminology.
since we can still rename skb_in_cgroup we should do it.

and since that was my only nit for this patch.
Acked-by: Alexei Starovoitov <ast@kernel.org>

All 3 patches should go via net-next and to avoid conflicts 1/3 can be
in cgroup tree as well (if you think there will be conflicts).
We did that in the past with tip and net-next and it worked out well.
Daniel or Martin, do you mind preparing in->under renaming patch for net?

Thanks!
Tejun Heo Aug. 12, 2016, 3:28 p.m. UTC | #10
Hello, Alexei.

On Fri, Aug 12, 2016 at 08:21:21AM -0700, Alexei Starovoitov wrote:
> > lol I should have read the whole thread before replying twice.  Sorry
> > about that.  Yeah, if we can still rename it, let's do "under".  It's
> > more intuitive and gives us the room to implement the real "in" test
> > if ever necessary in the future.
> 
> agree. Thanks for explaining 'in' vs 'under' terminology.
> since we can still rename skb_in_cgroup we should do it.

Sounds good to me.

> and since that was my only nit for this patch.
> Acked-by: Alexei Starovoitov <ast@kernel.org>

FWIW,

Acked-by: Tejun Heo <tj@kernel.org>

> All 3 patches should go via net-next and to avoid conflicts 1/3 can be
> in cgroup tree as well (if you think there will be conflicts).
> We did that in the past with tip and net-next and it worked out well.

Yeah, just route it through net-next.  If other changes ever need it,
I'll include the commit in cgroup tree.

Thanks.

Patch
diff mbox

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..bea0c4e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@  enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650..a2ac051 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@  static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69..d504722 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@  static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_in_cgroup &&
+		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1076,7 @@  static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_under_cgroup:
 	case BPF_FUNC_skb_in_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438f..e85f183 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,34 @@  static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	u32 idx = (u32)r2;
+	struct cgroup *cgrp;
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	if (unlikely(idx >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+	.func           = bpf_current_task_under_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +435,8 @@  static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
 	default:
 		return NULL;
 	}