diff mbox series

[bpf-next,1/3] bpf: add bpf_progenyof helper

Message ID 20190226223651.3166820-2-javierhonduco@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: add progenyof helper | expand

Commit Message

Javier Honduvilla Coto Feb. 26, 2019, 10:36 p.m. UTC
This patch adds the bpf_progenyof helper which receives a PID and returns
1 if the process currently being executed is in the process hierarchy
including itself or 0 if not.

This is very useful in tracing programs when we want to filter by a
given PID and all the children it might spawn. The current workarounds
most people implement for this purpose have issues:

- Attaching to process spawning syscalls and dynamically add those PIDs
  to some bpf map that would be used to filter is cumbersome and
potentially racy.
- Unrolling some loop to perform what this helper is doing consumes lots
  of instructions. That and the impossibility to jump backwards makes it
really hard to be correct in really large process chains.

Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  3 ++-
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c |  2 ++
 5 files changed, 35 insertions(+), 1 deletion(-)

Comments

Martin KaFai Lau Feb. 27, 2019, 6:26 a.m. UTC | #1
On Tue, Feb 26, 2019 at 02:36:49PM -0800, Javier Honduvilla Coto wrote:
> This patch adds the bpf_progenyof helper which receives a PID and returns
What is progenof?

> 1 if the process currently being executed is in the process hierarchy
> including itself or 0 if not.
> 
> This is very useful in tracing programs when we want to filter by a
> given PID and all the children it might spawn. The current workarounds
> most people implement for this purpose have issues:
> 
> - Attaching to process spawning syscalls and dynamically add those PIDs
>   to some bpf map that would be used to filter is cumbersome and
> potentially racy.
> - Unrolling some loop to perform what this helper is doing consumes lots
>   of instructions. That and the impossibility to jump backwards makes it
> really hard to be correct in really large process chains.
> 
> Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
> ---
>  include/linux/bpf.h      |  1 +
>  include/uapi/linux/bpf.h |  3 ++-
>  kernel/bpf/core.c        |  1 +
>  kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
>  kernel/trace/bpf_trace.c |  2 ++
>  5 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index de18227b3d95..447395ba202b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -921,6 +921,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
>  extern const struct bpf_func_proto bpf_spin_lock_proto;
>  extern const struct bpf_func_proto bpf_spin_unlock_proto;
>  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> +extern const struct bpf_func_proto bpf_progenyof_proto;
It seems only used in bpf_trace.c.  Does it have to be here?

>  
>  /* Shared helpers among cBPF and eBPF. */
>  void bpf_user_rnd_init_once(void);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index bcdd2474eee7..804e4218eb28 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2457,7 +2457,8 @@ union bpf_attr {
>  	FN(spin_lock),			\
>  	FN(spin_unlock),		\
>  	FN(sk_fullsock),		\
> -	FN(tcp_sock),
> +	FN(tcp_sock),			\
> +	FN(progenyof),
Please add doc like other helpers do.

>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index ef88b167959d..69e209fbd128 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2015,6 +2015,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
>  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
>  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
>  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> +const struct bpf_func_proto bpf_progenyof_proto __weak;
>  
>  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
>  {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index a411fc17d265..3899787e8dbf 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,7 @@
>  #include <linux/sched.h>
>  #include <linux/uidgid.h>
>  #include <linux/filter.h>
> +#include <linux/init_task.h>
>  
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
> @@ -364,3 +365,31 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
>  };
>  #endif
>  #endif
> +
> +BPF_CALL_1(bpf_progenyof, int, pid)
> +{
> +	int result = 0;
> +	struct task_struct *task = current;
> +
> +	if (unlikely(!task))
hmm.... Could current be NULL?

> +		return -EINVAL;
> +
> +	rcu_read_lock();
> +	while (task != &init_task) {
I don't know the details of init_task, so qq:
Could the passed in "pid" be the init_task->pid?
If possible, what is the expected "result"?

> +		if (task->pid == pid) {
> +			result = 1;
> +			break;
> +		}
> +		task = rcu_dereference(task->real_parent);
> +	}
> +	rcu_read_unlock();
> +
> +	return result;
> +}
> +
> +const struct bpf_func_proto bpf_progenyof_proto = {
> +	.func		= bpf_progenyof,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_ANYTHING,
> +};
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index f1a86a0d881d..8602ae83c799 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -600,6 +600,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		return &bpf_get_prandom_u32_proto;
>  	case BPF_FUNC_probe_read_str:
>  		return &bpf_probe_read_str_proto;
> +	case BPF_FUNC_progenyof:
> +		return &bpf_progenyof_proto;
>  #ifdef CONFIG_CGROUPS
>  	case BPF_FUNC_get_current_cgroup_id:
>  		return &bpf_get_current_cgroup_id_proto;
> -- 
> 2.17.1
>
Javier Honduvilla Coto March 1, 2019, 5:28 p.m. UTC | #2
On Wed, Feb 27, 2019 at 06:26:41AM +0000, Martin Lau wrote:
> On Tue, Feb 26, 2019 at 02:36:49PM -0800, Javier Honduvilla Coto wrote:
> > This patch adds the bpf_progenyof helper which receives a PID and returns
> What is progenof?
>
> > 1 if the process currently being executed is in the process hierarchy
> > including itself or 0 if not.
> >
> > This is very useful in tracing programs when we want to filter by a
> > given PID and all the children it might spawn. The current workarounds
> > most people implement for this purpose have issues:
> >
> > - Attaching to process spawning syscalls and dynamically add those PIDs
> >   to some bpf map that would be used to filter is cumbersome and
> > potentially racy.
> > - Unrolling some loop to perform what this helper is doing consumes lots
> >   of instructions. That and the impossibility to jump backwards makes it
> > really hard to be correct in really large process chains.
> >
> > Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
> > ---
> >  include/linux/bpf.h      |  1 +
> >  include/uapi/linux/bpf.h |  3 ++-
> >  kernel/bpf/core.c        |  1 +
> >  kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
> >  kernel/trace/bpf_trace.c |  2 ++
> >  5 files changed, 35 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index de18227b3d95..447395ba202b 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -921,6 +921,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
> >  extern const struct bpf_func_proto bpf_spin_lock_proto;
> >  extern const struct bpf_func_proto bpf_spin_unlock_proto;
> >  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > +extern const struct bpf_func_proto bpf_progenyof_proto;
> It seems only used in bpf_trace.c.  Does it have to be here?
>
> >
> >  /* Shared helpers among cBPF and eBPF. */
> >  void bpf_user_rnd_init_once(void);
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index bcdd2474eee7..804e4218eb28 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2457,7 +2457,8 @@ union bpf_attr {
> >  	FN(spin_lock),			\
> >  	FN(spin_unlock),		\
> >  	FN(sk_fullsock),		\
> > -	FN(tcp_sock),
> > +	FN(tcp_sock),			\
> > +	FN(progenyof),
> Please add doc like other helpers do.

Oops, good catch, thanks! Will send v2 soon!!

>
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index ef88b167959d..69e209fbd128 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -2015,6 +2015,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
> >  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> > +const struct bpf_func_proto bpf_progenyof_proto __weak;
> >
> >  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> >  {
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index a411fc17d265..3899787e8dbf 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/sched.h>
> >  #include <linux/uidgid.h>
> >  #include <linux/filter.h>
> > +#include <linux/init_task.h>
> >
> >  /* If kernel subsystem is allowing eBPF programs to call this function,
> >   * inside its own verifier_ops->get_func_proto() callback it should return
> > @@ -364,3 +365,31 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> >  };
> >  #endif
> >  #endif
> > +
> > +BPF_CALL_1(bpf_progenyof, int, pid)
> > +{
> > +	int result = 0;
> > +	struct task_struct *task = current;
> > +
> > +	if (unlikely(!task))
> hmm.... Could current be NULL?

Wasn't sure about this but added as bpf_get_current_pid_tgid,
bpf_get_current_uid_gid, and bpf_get_current_comm check for this. Texted Alexei
about this and he told me this is probably not necessary anymore, but I
guess it doesn't hurt leaving it?

>
> > +		return -EINVAL;
> > +
> > +	rcu_read_lock();
> > +	while (task != &init_task) {
> I don't know the details of init_task, so qq:
> Could the passed in "pid" be the init_task->pid?
> If possible, what is the expected "result"?
>

Yep! init_task doesn't set a pid for what I could see, so I guess it
will be PID=0. The test in the last patch check bpf_progenyof(0) :)

bpf_progenyof with 0 or 1 will always return 1

> > +		if (task->pid == pid) {
> > +			result = 1;
> > +			break;
> > +		}
> > +		task = rcu_dereference(task->real_parent);
> > +	}
> > +	rcu_read_unlock();
> > +
> > +	return result;
> > +}
> > +
> > +const struct bpf_func_proto bpf_progenyof_proto = {
> > +	.func		= bpf_progenyof,
> > +	.gpl_only	= false,
> > +	.ret_type	= RET_INTEGER,
> > +	.arg1_type	= ARG_ANYTHING,
> > +};
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index f1a86a0d881d..8602ae83c799 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -600,6 +600,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >  		return &bpf_get_prandom_u32_proto;
> >  	case BPF_FUNC_probe_read_str:
> >  		return &bpf_probe_read_str_proto;
> > +	case BPF_FUNC_progenyof:
> > +		return &bpf_progenyof_proto;
> >  #ifdef CONFIG_CGROUPS
> >  	case BPF_FUNC_get_current_cgroup_id:
> >  		return &bpf_get_current_cgroup_id_proto;
> > --
> > 2.17.1
> >
Javier Honduvilla Coto March 1, 2019, 5:43 p.m. UTC | #3
On Wed, Feb 27, 2019 at 06:26:41AM +0000, Martin Lau wrote:

Sorry, accidentally did not reply to all your comments:
> On Tue, Feb 26, 2019 at 02:36:49PM -0800, Javier Honduvilla Coto wrote:
> > This patch adds the bpf_progenyof helper which receives a PID and returns
> What is progenof?

progenyof is a helper we'd love to have in thr kernel as right now
checking if the current process pid is among the progeny of some
arbitrary pid has several disadvantages. There are more details in the
first commit message

>
> > 1 if the process currently being executed is in the process hierarchy
> > including itself or 0 if not.
> >
> > This is very useful in tracing programs when we want to filter by a
> > given PID and all the children it might spawn. The current workarounds
> > most people implement for this purpose have issues:
> >
> > - Attaching to process spawning syscalls and dynamically add those PIDs
> >   to some bpf map that would be used to filter is cumbersome and
> > potentially racy.
> > - Unrolling some loop to perform what this helper is doing consumes lots
> >   of instructions. That and the impossibility to jump backwards makes it
> > really hard to be correct in really large process chains.
> >
> > Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
> > ---
> >  include/linux/bpf.h      |  1 +
> >  include/uapi/linux/bpf.h |  3 ++-
> >  kernel/bpf/core.c        |  1 +
> >  kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
> >  kernel/trace/bpf_trace.c |  2 ++
> >  5 files changed, 35 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index de18227b3d95..447395ba202b 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -921,6 +921,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
> >  extern const struct bpf_func_proto bpf_spin_lock_proto;
> >  extern const struct bpf_func_proto bpf_spin_unlock_proto;
> >  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > +extern const struct bpf_func_proto bpf_progenyof_proto;
> It seems only used in bpf_trace.c.  Does it have to be here?

Yes, we need it in bpf_trace.c where we are requiring it
Martin KaFai Lau March 2, 2019, 12:01 a.m. UTC | #4
On Fri, Mar 01, 2019 at 09:28:39AM -0800, Javier Honduvilla Coto wrote:
> On Wed, Feb 27, 2019 at 06:26:41AM +0000, Martin Lau wrote:
> > On Tue, Feb 26, 2019 at 02:36:49PM -0800, Javier Honduvilla Coto wrote:
> > > This patch adds the bpf_progenyof helper which receives a PID and returns
> > What is progenof?
> >
> > > 1 if the process currently being executed is in the process hierarchy
> > > including itself or 0 if not.
> > >
> > > This is very useful in tracing programs when we want to filter by a
> > > given PID and all the children it might spawn. The current workarounds
> > > most people implement for this purpose have issues:
> > >
> > > - Attaching to process spawning syscalls and dynamically add those PIDs
> > >   to some bpf map that would be used to filter is cumbersome and
> > > potentially racy.
> > > - Unrolling some loop to perform what this helper is doing consumes lots
> > >   of instructions. That and the impossibility to jump backwards makes it
> > > really hard to be correct in really large process chains.
> > >
> > > Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
> > > ---
> > >  include/linux/bpf.h      |  1 +
> > >  include/uapi/linux/bpf.h |  3 ++-
> > >  kernel/bpf/core.c        |  1 +
> > >  kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
> > >  kernel/trace/bpf_trace.c |  2 ++
> > >  5 files changed, 35 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index de18227b3d95..447395ba202b 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -921,6 +921,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
> > >  extern const struct bpf_func_proto bpf_spin_lock_proto;
> > >  extern const struct bpf_func_proto bpf_spin_unlock_proto;
> > >  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > > +extern const struct bpf_func_proto bpf_progenyof_proto;
> > It seems only used in bpf_trace.c.  Does it have to be here?
> >
> > >
> > >  /* Shared helpers among cBPF and eBPF. */
> > >  void bpf_user_rnd_init_once(void);
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index bcdd2474eee7..804e4218eb28 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -2457,7 +2457,8 @@ union bpf_attr {
> > >  	FN(spin_lock),			\
> > >  	FN(spin_unlock),		\
> > >  	FN(sk_fullsock),		\
> > > -	FN(tcp_sock),
> > > +	FN(tcp_sock),			\
> > > +	FN(progenyof),
> > Please add doc like other helpers do.
> 
> Oops, good catch, thanks! Will send v2 soon!!
> 
> >
> > >
> > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > >   * function eBPF program intends to call
> > > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > > index ef88b167959d..69e209fbd128 100644
> > > --- a/kernel/bpf/core.c
> > > +++ b/kernel/bpf/core.c
> > > @@ -2015,6 +2015,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> > >  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> > >  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
> > >  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> > > +const struct bpf_func_proto bpf_progenyof_proto __weak;
> > >
> > >  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> > >  {
> > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > index a411fc17d265..3899787e8dbf 100644
> > > --- a/kernel/bpf/helpers.c
> > > +++ b/kernel/bpf/helpers.c
> > > @@ -18,6 +18,7 @@
> > >  #include <linux/sched.h>
> > >  #include <linux/uidgid.h>
> > >  #include <linux/filter.h>
> > > +#include <linux/init_task.h>
> > >
> > >  /* If kernel subsystem is allowing eBPF programs to call this function,
> > >   * inside its own verifier_ops->get_func_proto() callback it should return
> > > @@ -364,3 +365,31 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> > >  };
> > >  #endif
> > >  #endif
> > > +
> > > +BPF_CALL_1(bpf_progenyof, int, pid)
> > > +{
> > > +	int result = 0;
> > > +	struct task_struct *task = current;
> > > +
> > > +	if (unlikely(!task))
> > hmm.... Could current be NULL?
> 
> Wasn't sure about this but added as bpf_get_current_pid_tgid,
> bpf_get_current_uid_gid, and bpf_get_current_comm check for this. Texted Alexei
> about this and he told me this is probably not necessary anymore, but I
> guess it doesn't hurt leaving it?
> 
> >
> > > +		return -EINVAL;
> > > +
> > > +	rcu_read_lock();
> > > +	while (task != &init_task) {
> > I don't know the details of init_task, so qq:
> > Could the passed in "pid" be the init_task->pid?
> > If possible, what is the expected "result"?
> >
> 
> Yep! init_task doesn't set a pid for what I could see, so I guess it
> will be PID=0. The test in the last patch check bpf_progenyof(0) :)
> 
> bpf_progenyof with 0 or 1 will always return 1
the test in patch 3 commit message has this though:
"- progenyof(0) == 0"

so the intention for progenyof(0) is to always return 0 or 1?

A random ps output from my vm:
[root@arch-fb-vm1 bpf]# ps -eaf | head -3
UID        PID  PPID  C STIME TTY          TIME CMD
root         1     0  0 11:45 ?        00:00:12 /sbin/init
root         2     0  0 11:45 ?        00:00:00 [kthreadd]

I was asking because,
after reading the loop, it seems all tasks tracing back to init_task.
so my intuitive thinking is progenyof(init_task.pid) should always
return 1.  If it is otherwise, some comments and doc would be useful
to explain why treating init_task.pid differently.

> 
> > > +		if (task->pid == pid) {
> > > +			result = 1;
> > > +			break;
> > > +		}
> > > +		task = rcu_dereference(task->real_parent);
> > > +	}
> > > +	rcu_read_unlock();
> > > +
> > > +	return result;
> > > +}
> > > +
> > > +const struct bpf_func_proto bpf_progenyof_proto = {
> > > +	.func		= bpf_progenyof,
> > > +	.gpl_only	= false,
> > > +	.ret_type	= RET_INTEGER,
> > > +	.arg1_type	= ARG_ANYTHING,
> > > +};
> > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > index f1a86a0d881d..8602ae83c799 100644
> > > --- a/kernel/trace/bpf_trace.c
> > > +++ b/kernel/trace/bpf_trace.c
> > > @@ -600,6 +600,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> > >  		return &bpf_get_prandom_u32_proto;
> > >  	case BPF_FUNC_probe_read_str:
> > >  		return &bpf_probe_read_str_proto;
> > > +	case BPF_FUNC_progenyof:
> > > +		return &bpf_progenyof_proto;
> > >  #ifdef CONFIG_CGROUPS
> > >  	case BPF_FUNC_get_current_cgroup_id:
> > >  		return &bpf_get_current_cgroup_id_proto;
> > > --
> > > 2.17.1
> > >
Javier Honduvilla Coto March 2, 2019, 1:08 a.m. UTC | #5
On Sat, Mar 02, 2019 at 12:01:14AM +0000, Martin Lau wrote:
> On Fri, Mar 01, 2019 at 09:28:39AM -0800, Javier Honduvilla Coto wrote:
> > On Wed, Feb 27, 2019 at 06:26:41AM +0000, Martin Lau wrote:
> > > On Tue, Feb 26, 2019 at 02:36:49PM -0800, Javier Honduvilla Coto wrote:
> > > > This patch adds the bpf_progenyof helper which receives a PID and returns
> > > What is progenof?
> > >
> > > > 1 if the process currently being executed is in the process hierarchy
> > > > including itself or 0 if not.
> > > >
> > > > This is very useful in tracing programs when we want to filter by a
> > > > given PID and all the children it might spawn. The current workarounds
> > > > most people implement for this purpose have issues:
> > > >
> > > > - Attaching to process spawning syscalls and dynamically add those PIDs
> > > >   to some bpf map that would be used to filter is cumbersome and
> > > > potentially racy.
> > > > - Unrolling some loop to perform what this helper is doing consumes lots
> > > >   of instructions. That and the impossibility to jump backwards makes it
> > > > really hard to be correct in really large process chains.
> > > >
> > > > Signed-off-by: Javier Honduvilla Coto <javierhonduco@fb.com>
> > > > ---
> > > >  include/linux/bpf.h      |  1 +
> > > >  include/uapi/linux/bpf.h |  3 ++-
> > > >  kernel/bpf/core.c        |  1 +
> > > >  kernel/bpf/helpers.c     | 29 +++++++++++++++++++++++++++++
> > > >  kernel/trace/bpf_trace.c |  2 ++
> > > >  5 files changed, 35 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index de18227b3d95..447395ba202b 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -921,6 +921,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
> > > >  extern const struct bpf_func_proto bpf_spin_lock_proto;
> > > >  extern const struct bpf_func_proto bpf_spin_unlock_proto;
> > > >  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > > > +extern const struct bpf_func_proto bpf_progenyof_proto;
> > > It seems only used in bpf_trace.c.  Does it have to be here?
> > >
> > > >
> > > >  /* Shared helpers among cBPF and eBPF. */
> > > >  void bpf_user_rnd_init_once(void);
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index bcdd2474eee7..804e4218eb28 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -2457,7 +2457,8 @@ union bpf_attr {
> > > >  	FN(spin_lock),			\
> > > >  	FN(spin_unlock),		\
> > > >  	FN(sk_fullsock),		\
> > > > -	FN(tcp_sock),
> > > > +	FN(tcp_sock),			\
> > > > +	FN(progenyof),
> > > Please add doc like other helpers do.
> >
> > Oops, good catch, thanks! Will send v2 soon!!
> >
> > >
> > > >
> > > >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> > > >   * function eBPF program intends to call
> > > > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > > > index ef88b167959d..69e209fbd128 100644
> > > > --- a/kernel/bpf/core.c
> > > > +++ b/kernel/bpf/core.c
> > > > @@ -2015,6 +2015,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> > > >  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> > > >  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
> > > >  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> > > > +const struct bpf_func_proto bpf_progenyof_proto __weak;
> > > >
> > > >  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> > > >  {
> > > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > > > index a411fc17d265..3899787e8dbf 100644
> > > > --- a/kernel/bpf/helpers.c
> > > > +++ b/kernel/bpf/helpers.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include <linux/sched.h>
> > > >  #include <linux/uidgid.h>
> > > >  #include <linux/filter.h>
> > > > +#include <linux/init_task.h>
> > > >
> > > >  /* If kernel subsystem is allowing eBPF programs to call this function,
> > > >   * inside its own verifier_ops->get_func_proto() callback it should return
> > > > @@ -364,3 +365,31 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> > > >  };
> > > >  #endif
> > > >  #endif
> > > > +
> > > > +BPF_CALL_1(bpf_progenyof, int, pid)
> > > > +{
> > > > +	int result = 0;
> > > > +	struct task_struct *task = current;
> > > > +
> > > > +	if (unlikely(!task))
> > > hmm.... Could current be NULL?
> >
> > Wasn't sure about this but added as bpf_get_current_pid_tgid,
> > bpf_get_current_uid_gid, and bpf_get_current_comm check for this. Texted Alexei
> > about this and he told me this is probably not necessary anymore, but I
> > guess it doesn't hurt leaving it?
> >
> > >
> > > > +		return -EINVAL;
> > > > +
> > > > +	rcu_read_lock();
> > > > +	while (task != &init_task) {
> > > I don't know the details of init_task, so qq:
> > > Could the passed in "pid" be the init_task->pid?
> > > If possible, what is the expected "result"?
> > >
> >
> > Yep! init_task doesn't set a pid for what I could see, so I guess it
> > will be PID=0. The test in the last patch check bpf_progenyof(0) :)
> >
> > bpf_progenyof with 0 or 1 will always return 1
> the test in patch 3 commit message has this though:
> "- progenyof(0) == 0"
>
> so the intention for progenyof(0) is to always return 0 or 1?
>
> A random ps output from my vm:
> [root@arch-fb-vm1 bpf]# ps -eaf | head -3
> UID        PID  PPID  C STIME TTY          TIME CMD
> root         1     0  0 11:45 ?        00:00:12 /sbin/init
> root         2     0  0 11:45 ?        00:00:00 [kthreadd]
>
> I was asking because,
> after reading the loop, it seems all tasks tracing back to init_task.
> so my intuitive thinking is progenyof(init_task.pid) should always
> return 1.  If it is otherwise, some comments and doc would be useful
> to explain why treating init_task.pid differently.
>

My bad, that was a typo. bpf_progenyof(1) returns 1, and bpf_progenyof(0)
returns 0. Sorry for the confusion.

This is a good point! I chose to return 0 for this case because of
init_task being an implementation detail and PID 0 not having much
meaning for most users, but you are right that I should document it as
an exception if we keep it as is

That being said I think changing the behaviour to make progenyof(0) return 1
makes more sense from a semantics perspective!

> >
> > > > +		if (task->pid == pid) {
> > > > +			result = 1;
> > > > +			break;
> > > > +		}
> > > > +		task = rcu_dereference(task->real_parent);
> > > > +	}
> > > > +	rcu_read_unlock();
> > > > +
> > > > +	return result;
> > > > +}
> > > > +
> > > > +const struct bpf_func_proto bpf_progenyof_proto = {
> > > > +	.func		= bpf_progenyof,
> > > > +	.gpl_only	= false,
> > > > +	.ret_type	= RET_INTEGER,
> > > > +	.arg1_type	= ARG_ANYTHING,
> > > > +};
> > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > index f1a86a0d881d..8602ae83c799 100644
> > > > --- a/kernel/trace/bpf_trace.c
> > > > +++ b/kernel/trace/bpf_trace.c
> > > > @@ -600,6 +600,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> > > >  		return &bpf_get_prandom_u32_proto;
> > > >  	case BPF_FUNC_probe_read_str:
> > > >  		return &bpf_probe_read_str_proto;
> > > > +	case BPF_FUNC_progenyof:
> > > > +		return &bpf_progenyof_proto;
> > > >  #ifdef CONFIG_CGROUPS
> > > >  	case BPF_FUNC_get_current_cgroup_id:
> > > >  		return &bpf_get_current_cgroup_id_proto;
> > > > --
> > > > 2.17.1
> > > >
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de18227b3d95..447395ba202b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -921,6 +921,7 @@  extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
 extern const struct bpf_func_proto bpf_spin_lock_proto;
 extern const struct bpf_func_proto bpf_spin_unlock_proto;
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
+extern const struct bpf_func_proto bpf_progenyof_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bcdd2474eee7..804e4218eb28 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2457,7 +2457,8 @@  union bpf_attr {
 	FN(spin_lock),			\
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
-	FN(tcp_sock),
+	FN(tcp_sock),			\
+	FN(progenyof),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef88b167959d..69e209fbd128 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2015,6 +2015,7 @@  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_progenyof_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a411fc17d265..3899787e8dbf 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,7 @@ 
 #include <linux/sched.h>
 #include <linux/uidgid.h>
 #include <linux/filter.h>
+#include <linux/init_task.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -364,3 +365,31 @@  const struct bpf_func_proto bpf_get_local_storage_proto = {
 };
 #endif
 #endif
+
+BPF_CALL_1(bpf_progenyof, int, pid)
+{
+	int result = 0;
+	struct task_struct *task = current;
+
+	if (unlikely(!task))
+		return -EINVAL;
+
+	rcu_read_lock();
+	while (task != &init_task) {
+		if (task->pid == pid) {
+			result = 1;
+			break;
+		}
+		task = rcu_dereference(task->real_parent);
+	}
+	rcu_read_unlock();
+
+	return result;
+}
+
+const struct bpf_func_proto bpf_progenyof_proto = {
+	.func		= bpf_progenyof,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f1a86a0d881d..8602ae83c799 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -600,6 +600,8 @@  tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_probe_read_str:
 		return &bpf_probe_read_str_proto;
+	case BPF_FUNC_progenyof:
+		return &bpf_progenyof_proto;
 #ifdef CONFIG_CGROUPS
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;