diff mbox series

[bpf-next,v2,1/2] bpf: avoid iterating duplicated files for task_file iterator

Message ID 20200828053815.817806-1-yhs@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: avoid iterating duplicated files for task_file iterator | expand

Commit Message

Yonghong Song Aug. 28, 2020, 5:38 a.m. UTC
Currently, task_file iterator iterates all files from all tasks.
This may potentially visit a lot of duplicated files if there are
many tasks sharing the same files, e.g., typical pthreads
where these pthreads and the main thread are sharing the same files.

This patch changed task_file iterator to skip a particular task
if that task shares the same files as its group_leader (the task
having the same tgid and also task->tgid == task->pid).
This will preserve the same result, visiting all files from all
tasks, and will reduce runtime cost significantl, e.g., if there are
a lot of pthreads and the process has a lot of open files.

Suggested-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/bpf/task_iter.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

It would be good if somebody familar with sched code can help check
whether I missed anything or not (e.g., locks, etc.)
for the code change
  task->files == task->group_leader->files

Note the change in this patch might have conflicts with
e60572b8d4c3 ("bpf: Avoid visit same object multiple times")
which is merged into bpf/net sometimes back.

Comments

Josef Bacik Sept. 1, 2020, 4:17 p.m. UTC | #1
On 8/28/20 1:38 AM, Yonghong Song wrote:
> Currently, task_file iterator iterates all files from all tasks.
> This may potentially visit a lot of duplicated files if there are
> many tasks sharing the same files, e.g., typical pthreads
> where these pthreads and the main thread are sharing the same files.
> 
> This patch changed task_file iterator to skip a particular task
> if that task shares the same files as its group_leader (the task
> having the same tgid and also task->tgid == task->pid).
> This will preserve the same result, visiting all files from all
> tasks, and will reduce runtime cost significantl, e.g., if there are
> a lot of pthreads and the process has a lot of open files.
> 
> Suggested-by: Andrii Nakryiko <andriin@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>   kernel/bpf/task_iter.c | 14 +++++++++-----
>   1 file changed, 9 insertions(+), 5 deletions(-)
> 
> It would be good if somebody familar with sched code can help check
> whether I missed anything or not (e.g., locks, etc.)
> for the code change
>    task->files == task->group_leader->files
> 
> Note the change in this patch might have conflicts with
> e60572b8d4c3 ("bpf: Avoid visit same object multiple times")
> which is merged into bpf/net sometimes back.
> 
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 232df29793e9..0c5c96bb6964 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -22,7 +22,8 @@ struct bpf_iter_seq_task_info {
>   };
>   
>   static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
> -					     u32 *tid)
> +					     u32 *tid,
> +					     bool skip_if_dup_files)
>   {
>   	struct task_struct *task = NULL;
>   	struct pid *pid;
> @@ -32,7 +33,10 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
>   	pid = idr_get_next(&ns->idr, tid);
>   	if (pid) {
>   		task = get_pid_task(pid, PIDTYPE_PID);
> -		if (!task) {
> +		if (!task ||
> +		    (skip_if_dup_files &&
> +		     task->tgid != task->pid &&
> +		     task->files == task->group_leader->files)) {

This is fine, we're not deref'ing files, if we were you'd need 
get_files_struct().  You can deref task->group_leader here because you got the 
task so this is safe.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
Josef Bacik Sept. 1, 2020, 5:18 p.m. UTC | #2
On 8/28/20 1:38 AM, Yonghong Song wrote:
> Currently, task_file iterator iterates all files from all tasks.
> This may potentially visit a lot of duplicated files if there are
> many tasks sharing the same files, e.g., typical pthreads
> where these pthreads and the main thread are sharing the same files.
> 
> This patch changed task_file iterator to skip a particular task
> if that task shares the same files as its group_leader (the task
> having the same tgid and also task->tgid == task->pid).
> This will preserve the same result, visiting all files from all
> tasks, and will reduce runtime cost significantl, e.g., if there are
> a lot of pthreads and the process has a lot of open files.
> 
> Suggested-by: Andrii Nakryiko <andriin@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>   kernel/bpf/task_iter.c | 14 +++++++++-----
>   1 file changed, 9 insertions(+), 5 deletions(-)
> 
> It would be good if somebody familar with sched code can help check
> whether I missed anything or not (e.g., locks, etc.)
> for the code change
>    task->files == task->group_leader->files
> 
> Note the change in this patch might have conflicts with
> e60572b8d4c3 ("bpf: Avoid visit same object multiple times")
> which is merged into bpf/net sometimes back.
> 
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 232df29793e9..0c5c96bb6964 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -22,7 +22,8 @@ struct bpf_iter_seq_task_info {
>   };
>   
>   static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
> -					     u32 *tid)
> +					     u32 *tid,
> +					     bool skip_if_dup_files)
>   {
>   	struct task_struct *task = NULL;
>   	struct pid *pid;
> @@ -32,7 +33,10 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
>   	pid = idr_get_next(&ns->idr, tid);
>   	if (pid) {
>   		task = get_pid_task(pid, PIDTYPE_PID);
> -		if (!task) {
> +		if (!task ||
> +		    (skip_if_dup_files &&
> +		     task->tgid != task->pid &&
> +		     task->files == task->group_leader->files)) {
>   			++*tid;
>   			goto retry;

Sorry I only checked the task->files and task->group_leader thing, I forgot to 
actually pay attention to what the patch itself was doing.

This will leak task structs, you need something like

if (!task) {
	++*tid;
	goto retry;
}
if (skip_if_dup_files && etc) {
	++*tid;
	put_task_struct(task);
	goto retry;
}

otherwise you'll leak tasks.  Thanks,

Josef
Yonghong Song Sept. 1, 2020, 5:38 p.m. UTC | #3
On 9/1/20 10:18 AM, Josef Bacik wrote:
> On 8/28/20 1:38 AM, Yonghong Song wrote:
>> Currently, task_file iterator iterates all files from all tasks.
>> This may potentially visit a lot of duplicated files if there are
>> many tasks sharing the same files, e.g., typical pthreads
>> where these pthreads and the main thread are sharing the same files.
>>
>> This patch changed task_file iterator to skip a particular task
>> if that task shares the same files as its group_leader (the task
>> having the same tgid and also task->tgid == task->pid).
>> This will preserve the same result, visiting all files from all
>> tasks, and will reduce runtime cost significantl, e.g., if there are
>> a lot of pthreads and the process has a lot of open files.
>>
>> Suggested-by: Andrii Nakryiko <andriin@fb.com>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   kernel/bpf/task_iter.c | 14 +++++++++-----
>>   1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> It would be good if somebody familar with sched code can help check
>> whether I missed anything or not (e.g., locks, etc.)
>> for the code change
>>    task->files == task->group_leader->files
>>
>> Note the change in this patch might have conflicts with
>> e60572b8d4c3 ("bpf: Avoid visit same object multiple times")
>> which is merged into bpf/net sometimes back.
>>
>> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
>> index 232df29793e9..0c5c96bb6964 100644
>> --- a/kernel/bpf/task_iter.c
>> +++ b/kernel/bpf/task_iter.c
>> @@ -22,7 +22,8 @@ struct bpf_iter_seq_task_info {
>>   };
>>   static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
>> -                         u32 *tid)
>> +                         u32 *tid,
>> +                         bool skip_if_dup_files)
>>   {
>>       struct task_struct *task = NULL;
>>       struct pid *pid;
>> @@ -32,7 +33,10 @@ static struct task_struct *task_seq_get_next(struct 
>> pid_namespace *ns,
>>       pid = idr_get_next(&ns->idr, tid);
>>       if (pid) {
>>           task = get_pid_task(pid, PIDTYPE_PID);
>> -        if (!task) {
>> +        if (!task ||
>> +            (skip_if_dup_files &&
>> +             task->tgid != task->pid &&
>> +             task->files == task->group_leader->files)) {
>>               ++*tid;
>>               goto retry;
> 
> Sorry I only checked the task->files and task->group_leader thing, I 
> forgot to actually pay attention to what the patch itself was doing.
> 
> This will leak task structs, you need something like
> 
> if (!task) {
>      ++*tid;
>      goto retry;
> }
> if (skip_if_dup_files && etc) {
>      ++*tid;
>      put_task_struct(task);
>      goto retry;
> }
> 
> otherwise you'll leak tasks.  Thanks,

Or, yes, good catch. Thanks! I too focused on task->files == 
task->group_lead->files and did not think deep on leaking
tasks.

Will send v2.

> 
> Josef
diff mbox series

Patch

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 232df29793e9..0c5c96bb6964 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -22,7 +22,8 @@  struct bpf_iter_seq_task_info {
 };
 
 static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
-					     u32 *tid)
+					     u32 *tid,
+					     bool skip_if_dup_files)
 {
 	struct task_struct *task = NULL;
 	struct pid *pid;
@@ -32,7 +33,10 @@  static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
 	pid = idr_get_next(&ns->idr, tid);
 	if (pid) {
 		task = get_pid_task(pid, PIDTYPE_PID);
-		if (!task) {
+		if (!task ||
+		    (skip_if_dup_files &&
+		     task->tgid != task->pid &&
+		     task->files == task->group_leader->files)) {
 			++*tid;
 			goto retry;
 		}
@@ -47,7 +51,7 @@  static void *task_seq_start(struct seq_file *seq, loff_t *pos)
 	struct bpf_iter_seq_task_info *info = seq->private;
 	struct task_struct *task;
 
-	task = task_seq_get_next(info->common.ns, &info->tid);
+	task = task_seq_get_next(info->common.ns, &info->tid, false);
 	if (!task)
 		return NULL;
 
@@ -64,7 +68,7 @@  static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++*pos;
 	++info->tid;
 	put_task_struct((struct task_struct *)v);
-	task = task_seq_get_next(info->common.ns, &info->tid);
+	task = task_seq_get_next(info->common.ns, &info->tid, false);
 	if (!task)
 		return NULL;
 
@@ -147,7 +151,7 @@  task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 		curr_files = *fstruct;
 		curr_fd = info->fd;
 	} else {
-		curr_task = task_seq_get_next(ns, &curr_tid);
+		curr_task = task_seq_get_next(ns, &curr_tid, true);
 		if (!curr_task)
 			return NULL;