diff mbox series

[bpf-next,v11,2/4] bpf: added new helper bpf_get_ns_current_pid_tgid

Message ID 20190924152005.4659-3-cneirabustos@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series BPF: New helper to obtain namespace data from current task | expand

Commit Message

Carlos Antonio Neira Bustos Sept. 24, 2019, 3:20 p.m. UTC
New bpf helper bpf_get_ns_current_pid_tgid,
This helper will return pid and tgid from current task
which namespace matches dev_t and inode number provided,
this will allows us to instrument a process inside a container.

Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 18 +++++++++++++++++-
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 32 ++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c |  2 ++
 5 files changed, 53 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Sept. 27, 2019, 4:15 p.m. UTC | #1
On Thu, Sep 26, 2019 at 1:15 AM Carlos Neira <cneirabustos@gmail.com> wrote:
>
> New bpf helper bpf_get_ns_current_pid_tgid,
> This helper will return pid and tgid from current task
> which namespace matches dev_t and inode number provided,
> this will allows us to instrument a process inside a container.
>
> Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
> ---
>  include/linux/bpf.h      |  1 +
>  include/uapi/linux/bpf.h | 18 +++++++++++++++++-
>  kernel/bpf/core.c        |  1 +
>  kernel/bpf/helpers.c     | 32 ++++++++++++++++++++++++++++++++
>  kernel/trace/bpf_trace.c |  2 ++
>  5 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 5b9d22338606..231001475504 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
>  extern const struct bpf_func_proto bpf_strtol_proto;
>  extern const struct bpf_func_proto bpf_strtoul_proto;
>  extern const struct bpf_func_proto bpf_tcp_sock_proto;
> +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
>
>  /* Shared helpers among cBPF and eBPF. */
>  void bpf_user_rnd_init_once(void);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 77c6be96d676..9272dc8fb08c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2750,6 +2750,21 @@ union bpf_attr {
>   *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
>   *
>   *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
> + *
> + * int bpf_get_ns_current_pid_tgid(u32 dev, u64 inum)
> + *     Return
> + *             A 64-bit integer containing the current tgid and pid from current task

Function signature doesn't correspond to the actual return type (int vs u64).

> + *              which namespace inode and dev_t matches , and is create as such:
> + *             *current_task*\ **->tgid << 32 \|**
> + *             *current_task*\ **->pid**.
> + *
> + *             On failure, the returned value is one of the following:
> + *
> + *             **-EINVAL** if dev and inum supplied don't match dev_t and inode number
> + *              with nsfs of current task.
> + *
> + *             **-ENOENT** if /proc/self/ns does not exists.
> + *
>   */

[...]

>  #include "../../lib/kstrtox.h"
>
> @@ -487,3 +489,33 @@ const struct bpf_func_proto bpf_strtoul_proto = {
>         .arg4_type      = ARG_PTR_TO_LONG,
>  };
>  #endif
> +
> +BPF_CALL_2(bpf_get_ns_current_pid_tgid, u32, dev, u64, inum)

Just curious, is dev_t officially specified as u32 and is never
supposed to grow bigger? I wonder if accepting u64 might be more
future-proof API here?

> +{
> +       struct task_struct *task = current;
> +       struct pid_namespace *pidns;

[...]
Yonghong Song Sept. 27, 2019, 4:59 p.m. UTC | #2
On 9/27/19 9:15 AM, Andrii Nakryiko wrote:
> On Thu, Sep 26, 2019 at 1:15 AM Carlos Neira <cneirabustos@gmail.com> wrote:
>>
>> New bpf helper bpf_get_ns_current_pid_tgid,
>> This helper will return pid and tgid from current task
>> which namespace matches dev_t and inode number provided,
>> this will allows us to instrument a process inside a container.
>>
>> Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
>> ---
>>   include/linux/bpf.h      |  1 +
>>   include/uapi/linux/bpf.h | 18 +++++++++++++++++-
>>   kernel/bpf/core.c        |  1 +
>>   kernel/bpf/helpers.c     | 32 ++++++++++++++++++++++++++++++++
>>   kernel/trace/bpf_trace.c |  2 ++
>>   5 files changed, 53 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 5b9d22338606..231001475504 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
>>   extern const struct bpf_func_proto bpf_strtol_proto;
>>   extern const struct bpf_func_proto bpf_strtoul_proto;
>>   extern const struct bpf_func_proto bpf_tcp_sock_proto;
>> +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
>>
>>   /* Shared helpers among cBPF and eBPF. */
>>   void bpf_user_rnd_init_once(void);
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 77c6be96d676..9272dc8fb08c 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2750,6 +2750,21 @@ union bpf_attr {
>>    *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
>>    *
>>    *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
>> + *
>> + * int bpf_get_ns_current_pid_tgid(u32 dev, u64 inum)
>> + *     Return
>> + *             A 64-bit integer containing the current tgid and pid from current task
> 
> Function signature doesn't correspond to the actual return type (int vs u64).
> 
>> + *              which namespace inode and dev_t matches , and is create as such:
>> + *             *current_task*\ **->tgid << 32 \|**
>> + *             *current_task*\ **->pid**.
>> + *
>> + *             On failure, the returned value is one of the following:
>> + *
>> + *             **-EINVAL** if dev and inum supplied don't match dev_t and inode number
>> + *              with nsfs of current task.
>> + *
>> + *             **-ENOENT** if /proc/self/ns does not exists.
>> + *
>>    */
> 
> [...]
> 
>>   #include "../../lib/kstrtox.h"
>>
>> @@ -487,3 +489,33 @@ const struct bpf_func_proto bpf_strtoul_proto = {
>>          .arg4_type      = ARG_PTR_TO_LONG,
>>   };
>>   #endif
>> +
>> +BPF_CALL_2(bpf_get_ns_current_pid_tgid, u32, dev, u64, inum)
> 
> Just curious, is dev_t officially specified as u32 and is never
> supposed to grow bigger? I wonder if accepting u64 might be more
> future-proof API here?

This is what we have now in kernel (include/linux/types.h)
typedef u32 __kernel_dev_t;
typedef __kernel_dev_t          dev_t;

But userspace dev_t (defined at /usr/include/sys/types.h) have
8 bytes.

Agree. Let us just use u64. It won't hurt and also will be fine
if kernel internal dev_t becomes 64bit.

> 
>> +{
>> +       struct task_struct *task = current;
>> +       struct pid_namespace *pidns;
> 
> [...]
>
Andrii Nakryiko Sept. 27, 2019, 5:24 p.m. UTC | #3
On Fri, Sep 27, 2019 at 9:59 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 9/27/19 9:15 AM, Andrii Nakryiko wrote:
> > On Thu, Sep 26, 2019 at 1:15 AM Carlos Neira <cneirabustos@gmail.com> wrote:
> >>
> >> New bpf helper bpf_get_ns_current_pid_tgid,
> >> This helper will return pid and tgid from current task
> >> which namespace matches dev_t and inode number provided,
> >> this will allows us to instrument a process inside a container.
> >>
> >> Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
> >> ---
> >>   include/linux/bpf.h      |  1 +
> >>   include/uapi/linux/bpf.h | 18 +++++++++++++++++-
> >>   kernel/bpf/core.c        |  1 +
> >>   kernel/bpf/helpers.c     | 32 ++++++++++++++++++++++++++++++++
> >>   kernel/trace/bpf_trace.c |  2 ++
> >>   5 files changed, 53 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index 5b9d22338606..231001475504 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
> >>   extern const struct bpf_func_proto bpf_strtol_proto;
> >>   extern const struct bpf_func_proto bpf_strtoul_proto;
> >>   extern const struct bpf_func_proto bpf_tcp_sock_proto;
> >> +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
> >>
> >>   /* Shared helpers among cBPF and eBPF. */
> >>   void bpf_user_rnd_init_once(void);
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index 77c6be96d676..9272dc8fb08c 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -2750,6 +2750,21 @@ union bpf_attr {
> >>    *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
> >>    *
> >>    *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
> >> + *
> >> + * int bpf_get_ns_current_pid_tgid(u32 dev, u64 inum)
> >> + *     Return
> >> + *             A 64-bit integer containing the current tgid and pid from current task
> >
> > Function signature doesn't correspond to the actual return type (int vs u64).
> >
> >> + *              which namespace inode and dev_t matches , and is create as such:
> >> + *             *current_task*\ **->tgid << 32 \|**
> >> + *             *current_task*\ **->pid**.
> >> + *
> >> + *             On failure, the returned value is one of the following:
> >> + *
> >> + *             **-EINVAL** if dev and inum supplied don't match dev_t and inode number
> >> + *              with nsfs of current task.
> >> + *
> >> + *             **-ENOENT** if /proc/self/ns does not exists.
> >> + *
> >>    */
> >
> > [...]
> >
> >>   #include "../../lib/kstrtox.h"
> >>
> >> @@ -487,3 +489,33 @@ const struct bpf_func_proto bpf_strtoul_proto = {
> >>          .arg4_type      = ARG_PTR_TO_LONG,
> >>   };
> >>   #endif
> >> +
> >> +BPF_CALL_2(bpf_get_ns_current_pid_tgid, u32, dev, u64, inum)
> >
> > Just curious, is dev_t officially specified as u32 and is never
> > supposed to grow bigger? I wonder if accepting u64 might be more
> > future-proof API here?
>
> This is what we have now in kernel (include/linux/types.h)
> typedef u32 __kernel_dev_t;
> typedef __kernel_dev_t          dev_t;
>
> But userspace dev_t (defined at /usr/include/sys/types.h) have
> 8 bytes.
>
> Agree. Let us just use u64. It won't hurt and also will be fine
> if kernel internal dev_t becomes 64bit.

Sounds good. Let's not forget to check that conversion to dev_t
doesn't loose high bits, something like:

if ((u64)(dev_t)dev != dev)
    return -E<something>;

>
> >
> >> +{
> >> +       struct task_struct *task = current;
> >> +       struct pid_namespace *pidns;
> >
> > [...]
> >
Carlos Antonio Neira Bustos Sept. 28, 2019, 1:42 a.m. UTC | #4
On Fri, Sep 27, 2019 at 10:24:46AM -0700, Andrii Nakryiko wrote:
> On Fri, Sep 27, 2019 at 9:59 AM Yonghong Song <yhs@fb.com> wrote:
> >
> >
> >
> > On 9/27/19 9:15 AM, Andrii Nakryiko wrote:
> > > On Thu, Sep 26, 2019 at 1:15 AM Carlos Neira <cneirabustos@gmail.com> wrote:
> > >>
> > >> New bpf helper bpf_get_ns_current_pid_tgid,
> > >> This helper will return pid and tgid from current task
> > >> which namespace matches dev_t and inode number provided,
> > >> this will allows us to instrument a process inside a container.
> > >>
> > >> Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
> > >> ---
> > >>   include/linux/bpf.h      |  1 +
> > >>   include/uapi/linux/bpf.h | 18 +++++++++++++++++-
> > >>   kernel/bpf/core.c        |  1 +
> > >>   kernel/bpf/helpers.c     | 32 ++++++++++++++++++++++++++++++++
> > >>   kernel/trace/bpf_trace.c |  2 ++
> > >>   5 files changed, 53 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > >> index 5b9d22338606..231001475504 100644
> > >> --- a/include/linux/bpf.h
> > >> +++ b/include/linux/bpf.h
> > >> @@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > >>   extern const struct bpf_func_proto bpf_strtol_proto;
> > >>   extern const struct bpf_func_proto bpf_strtoul_proto;
> > >>   extern const struct bpf_func_proto bpf_tcp_sock_proto;
> > >> +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
> > >>
> > >>   /* Shared helpers among cBPF and eBPF. */
> > >>   void bpf_user_rnd_init_once(void);
> > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > >> index 77c6be96d676..9272dc8fb08c 100644
> > >> --- a/include/uapi/linux/bpf.h
> > >> +++ b/include/uapi/linux/bpf.h
> > >> @@ -2750,6 +2750,21 @@ union bpf_attr {
> > >>    *             **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
> > >>    *
> > >>    *             **-EPROTONOSUPPORT** IP packet version is not 4 or 6
> > >> + *
> > >> + * int bpf_get_ns_current_pid_tgid(u32 dev, u64 inum)
> > >> + *     Return
> > >> + *             A 64-bit integer containing the current tgid and pid from current task
> > >
> > > Function signature doesn't correspond to the actual return type (int vs u64).
> > >
> > >> + *              which namespace inode and dev_t matches , and is create as such:
> > >> + *             *current_task*\ **->tgid << 32 \|**
> > >> + *             *current_task*\ **->pid**.
> > >> + *
> > >> + *             On failure, the returned value is one of the following:
> > >> + *
> > >> + *             **-EINVAL** if dev and inum supplied don't match dev_t and inode number
> > >> + *              with nsfs of current task.
> > >> + *
> > >> + *             **-ENOENT** if /proc/self/ns does not exists.
> > >> + *
> > >>    */
> > >
> > > [...]
> > >
> > >>   #include "../../lib/kstrtox.h"
> > >>
> > >> @@ -487,3 +489,33 @@ const struct bpf_func_proto bpf_strtoul_proto = {
> > >>          .arg4_type      = ARG_PTR_TO_LONG,
> > >>   };
> > >>   #endif
> > >> +
> > >> +BPF_CALL_2(bpf_get_ns_current_pid_tgid, u32, dev, u64, inum)
> > >
> > > Just curious, is dev_t officially specified as u32 and is never
> > > supposed to grow bigger? I wonder if accepting u64 might be more
> > > future-proof API here?
> >
> > This is what we have now in kernel (include/linux/types.h)
> > typedef u32 __kernel_dev_t;
> > typedef __kernel_dev_t          dev_t;
> >
> > But userspace dev_t (defined at /usr/include/sys/types.h) have
> > 8 bytes.
> >
> > Agree. Let us just use u64. It won't hurt and also will be fine
> > if kernel internal dev_t becomes 64bit.
> 
> Sounds good. Let's not forget to check that conversion to dev_t
> doesn't loose high bits, something like:
> 
> if ((u64)(dev_t)dev != dev)
>     return -E<something>;
> 
> >
> > >
> > >> +{
> > >> +       struct task_struct *task = current;
> > >> +       struct pid_namespace *pidns;
> > >
> > > [...]
> > >

Thanks Yonghong and Andrii,

I'll include these fixes on V12, I'll work on this over the weekend.

Bests
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b9d22338606..231001475504 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1055,6 +1055,7 @@  extern const struct bpf_func_proto bpf_get_local_storage_proto;
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77c6be96d676..9272dc8fb08c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2750,6 +2750,21 @@  union bpf_attr {
  *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_get_ns_current_pid_tgid(u32 dev, u64 inum)
+ *	Return
+ *		A 64-bit integer containing the current tgid and pid from current task
+ *              which namespace inode and dev_t matches , and is create as such:
+ *		*current_task*\ **->tgid << 32 \|**
+ *		*current_task*\ **->pid**.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ *              with nsfs of current task.
+ *
+ *		**-ENOENT** if /proc/self/ns does not exists.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2862,7 +2877,8 @@  union bpf_attr {
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
-	FN(tcp_gen_syncookie),
+	FN(tcp_gen_syncookie),          \
+	FN(get_ns_current_pid_tgid),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 66088a9e9b9e..b2fd5358f472 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2042,6 +2042,7 @@  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5e28718928ca..81a716eae7ed 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,8 @@ 
 #include <linux/uidgid.h>
 #include <linux/filter.h>
 #include <linux/ctype.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_ns.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -487,3 +489,33 @@  const struct bpf_func_proto bpf_strtoul_proto = {
 	.arg4_type	= ARG_PTR_TO_LONG,
 };
 #endif
+
+BPF_CALL_2(bpf_get_ns_current_pid_tgid, u32, dev, u64, inum)
+{
+	struct task_struct *task = current;
+	struct pid_namespace *pidns;
+	pid_t pid, tgid;
+
+	if (unlikely(!task))
+		return -EINVAL;
+
+	pidns = task_active_pid_ns(task);
+	if (unlikely(!pidns))
+		return -ENOENT;
+
+	if (!ns_match(&pidns->ns, (dev_t)dev, inum))
+		return -EINVAL;
+
+	pid = task_pid_nr_ns(task, pidns);
+	tgid = task_tgid_nr_ns(task, pidns);
+
+	return (u64) tgid << 32 | pid;
+}
+
+const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
+	.func		= bpf_get_ns_current_pid_tgid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1255d14576..1d34f1013e78 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -709,6 +709,8 @@  tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
 	case BPF_FUNC_send_signal:
 		return &bpf_send_signal_proto;
+	case BPF_FUNC_get_ns_current_pid_tgid:
+		return &bpf_get_ns_current_pid_tgid_proto;
 	default:
 		return NULL;
 	}