diff mbox series

[v3,bpf-next,1/3] BPF: helpers: New helper to obtain namespace data from current task

Message ID 20190320164920.ha5ne7nlnu7fbx5i@dev00
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [v3,bpf-next,1/3] BPF: helpers: New helper to obtain namespace data from current task | expand

Commit Message

Carlos Antonio Neira Bustos March 20, 2019, 4:49 p.m. UTC
This is a series of patches to introduce a new helper called bpf_get_current_pidns_info,
this change has been splitted into the following patches:

1- Feature introduction
2- Update tools/.../bpf.h
3- Self tests and samples


From 852a65906122b05b4d1a23af868b2c245d240402 Mon Sep 17 00:00:00 2001
From: Carlos <cneirabustos@gmail.com>
Date: Tue, 19 Mar 2019 19:38:48 -0300
Subject: [PATCH] [PATCH bpf-next 1/3] BPF: New helper to obtain namespace data
 from current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

        u32 pid = bpf_get_current_pid_tgid() >> 32;
        if (pid != <pid_arg_passed_in>)
                return 0;
Could be modified to use bpf_get_current_pidns_info() as follows:

        struct bpf_pidns pidns;
        bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
        u32 pid = pidns.tgid;
        u32 nsid = pidns.nsid;
        if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
                return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@gmail.com>
-
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 26 ++++++++++++++++++-
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c |  2 ++
 5 files changed, 96 insertions(+), 1 deletion(-)

Comments

Carlos Antonio Neira Bustos March 20, 2019, 4:52 p.m. UTC | #1
From b7f5af5c1513b6e89fc5b68ddb1ad65e4fbede02 Mon Sep 17 00:00:00 2001
From: Carlos <cneirabustos@gmail.com>
Date: Mon, 18 Mar 2019 13:09:35 -0300
Subject: [PATCH] [PATCH bpf-next 2/3] BPF: New helper to obtain namespace data
   from current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

        u32 pid = bpf_get_current_pid_tgid() >> 32;
        if (pid != <pid_arg_passed_in>)
                return 0;
Could be modified to use bpf_get_current_pidns_info() as follows:

        struct bpf_pidns pidns;
        bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
        u32 pid = pidns.tgid;
        u32 nsid = pidns.nsid;
        if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
                return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@gmail.com>
---
 tools/include/uapi/linux/bpf.h            | 25 ++++++++++++++++++++++++-
 tools/testing/selftests/bpf/bpf_helpers.h |  3 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3c38ac9a92a7..5c7e517a59a6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2366,6 +2366,18 @@ union bpf_attr {
  *             current value is ect (ECN capable). Works with IPv6 and IPv4.
  *     Return
  *             1 if set, 0 if not set.
+ *
+ * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ *	Description
+ *		Copies into *pidns* pid, namespace id and tgid as seen by the
+ *		current namespace and also device from /proc/self/ns/pid.
+ *		*size_of_pidns* must be the size of *pidns*
+ *
+ *		This helper is used when pid filtering is needed inside a
+ *		container as bpf_get_current_tgid() helper returns always the
+ *		pid id as seen by the root namespace.
+ *	Return
+ *		0 on success -EINVAL on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2465,7 +2477,8 @@ union bpf_attr {
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),
+	FN(skb_ecn_set_ce),		\
+	FN(get_current_pidns_info),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3152,4 +3165,14 @@ struct bpf_line_info {
 struct bpf_spin_lock {
 	__u32	val;
 };
+
+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+ */
+struct bpf_pidns_info {
+	__u32 dev;
+	__u32 nsid;
+	__u32 tgid;
+	__u32 pid;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index c9433a496d54..8cd630d84c07 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -182,6 +182,9 @@ static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
 	(void *) BPF_FUNC_tcp_sock;
 static int (*bpf_skb_ecn_set_ce)(void *ctx) =
 	(void *) BPF_FUNC_skb_ecn_set_ce;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+					 unsigned int buf_size) =
+	(void *) BPF_FUNC_get_current_pidns_info;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
Carlos Antonio Neira Bustos March 20, 2019, 4:54 p.m. UTC | #2
From 7b7759029fb2bbf6d65a6ac26bb78121e91a30f1 Mon Sep 17 00:00:00 2001
From: Carlos <cneirabustos@gmail.com>
Date: Tue, 19 Mar 2019 18:00:56 -0300
Subject: [PATCH] [PATCH bpf-next 3/3] BPF: New helper to obtain namespace data
 from current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

        u32 pid = bpf_get_current_pid_tgid() >> 32;
        if (pid != <pid_arg_passed_in>)
                return 0;
Could be modified to use bpf_get_current_pidns_info() as follows:

        struct bpf_pidns pidns;
        bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
        u32 pid = pidns.tgid;
        u32 nsid = pidns.nsid;
        if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
                return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@gmail.com>
---
---
 samples/bpf/Makefile                               |   3 +
 samples/bpf/trace_ns_info_user.c                   |  35 ++++++
 samples/bpf/trace_ns_info_user_kern.c              |  44 +++++++
 tools/testing/selftests/bpf/Makefile               |   2 +-
 .../testing/selftests/bpf/progs/test_pidns_kern.c  |  48 +++++++
 tools/testing/selftests/bpf/test_pidns.c           | 138 +++++++++++++++++++++
 6 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 samples/bpf/trace_ns_info_user.c
 create mode 100644 samples/bpf/trace_ns_info_user_kern.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_kern.c
 create mode 100644 tools/testing/selftests/bpf/test_pidns.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 65e667bdf979..4af5abc230e5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
 hostprogs-y += xdp_sample_pkts
 hostprogs-y += hbm
+hostprogs-y += trace_ns_info
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
 hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -167,6 +169,7 @@ always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
 always += xdp_sample_pkts_kern.o
 always += hbm_out_kern.o
+always += trace_ns_info_user_kern.o
 
 KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+	FILE *f;
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+	printf("loading %s\n", filename);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	f = popen("taskset 1 ping  localhost", "r");
+	(void) f;
+	read_trace_pipe();
+	return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..96675e02b707
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	char fmt[] = "nsid:%u, dev: %u,  pid:%u\n";
+	struct bpf_pidns_info nsinfo;
+	int ok = 0;
+
+	ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+	if (ok == 0)
+		bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+				 (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 518cd587cd63..170d0f650318 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt test_tcpnotify_user test_sock_fields
+	test_netcnt test_tcpnotify_user test_sock_fields test_pidns
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
diff --git a/tools/testing/selftests/bpf/progs/test_pidns_kern.c b/tools/testing/selftests/bpf/progs/test_pidns_kern.c
new file mode 100644
index 000000000000..524a84f596e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pidns_kern.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <errno.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") nsidmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+	struct bpf_pidns_info nsinfo;
+	__u32 key = 0, *expected_pid, *val;
+
+	if(bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)))
+		return EINVAL;
+
+	expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+	if (!expected_pid || *expected_pid != nsinfo.pid)
+		return 0;
+
+	val = bpf_map_lookup_elem(&nsidmap, &key);
+	if (val)
+		*val = nsinfo.nsid;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_pidns.c b/tools/testing/selftests/bpf/test_pidns.c
new file mode 100644
index 000000000000..5d62f6414397
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pidns.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({		\
+	int __ret = !!(condition);			\
+	if (__ret) {					\
+		printf("%s:FAIL:%s ", __func__, tag);	\
+		printf(format);				\
+	} else {					\
+		printf("%s:PASS:%s\n", __func__, tag);	\
+	}						\
+	__ret;						\
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map)
+		return -1;
+	return bpf_map__fd(map);
+}
+
+
+int main(int argc, char **argv)
+{
+	const char *probe_name = "syscalls/sys_enter_nanosleep";
+	const char *file = "test_pidns_kern.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	int pidmap_fd, nsidmap_fd;
+	struct perf_event_attr attr = {};
+	struct bpf_object *obj;
+	__u32 knsid = 0;
+	__u32 key = 0, pid;
+	int exit_code = 1;
+	struct stat st;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto cleanup_cgroup_env;
+
+	nsidmap_fd = bpf_find_map(__func__, obj, "nsidmap");
+	if (CHECK(nsidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  nsidmap_fd, errno))
+		goto close_prog;
+
+	pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+	if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  pidmap_fd, errno))
+		goto close_prog;
+
+	pid = getpid();
+	bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* trigger some syscalls */
+	sleep(1);
+
+	err = bpf_map_lookup_elem(nsidmap_fd, &key, &knsid);
+	if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	if(stat("/proc/self/ns/pid",&st))
+		goto close_pmu;
+
+	if (CHECK(knsid != (__u32) st.st_ino, "compare_namespace_id",
+		  "kern knsid %u user unsid %u\n", knsid, (__u32) st.st_ino))
+		goto close_pmu;
+
+	exit_code = 0;
+	printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+cleanup_cgroup_env:
+	return exit_code;
+}
Alexei Starovoitov March 21, 2019, 1:23 a.m. UTC | #3
On Wed, Mar 20, 2019 at 01:49:22PM -0300, Carlos Antonio Neira Bustos wrote:
> 
> This is a series of patches to introduce a new helper called bpf_get_current_pidns_info,
> this change has been splitted into the following patches:
> 
> 1- Feature introduction
> 2- Update tools/.../bpf.h
> 3- Self tests and samples
> 
> 
> From 852a65906122b05b4d1a23af868b2c245d240402 Mon Sep 17 00:00:00 2001
> From: Carlos <cneirabustos@gmail.com>
> Date: Tue, 19 Mar 2019 19:38:48 -0300
> Subject: [PATCH] [PATCH bpf-next 1/3] BPF: New helper to obtain namespace data
>  from current task
> 
> This helper obtains the active namespace from current and returns pid, tgid,
> device and namespace id as seen from that namespace, allowing to instrument
> a process inside a container.
> Device is read from /proc/self/ns/pid, as in the future it's possible that
> different pid_ns files may belong to different devices, according
> to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> conference.
> Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> scripts but this helper returns the pid as seen by the root namespace which is
> fine when a bcc script is not executed inside a container.
> When the process of interest is inside a container, pid filtering will not work
> if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> returning the pid as it's seen by the current namespace where the script is
> executing.
> 
> This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> used to do pid filtering even inside a container.
> 
> For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
> 
>         u32 pid = bpf_get_current_pid_tgid() >> 32;
>         if (pid != <pid_arg_passed_in>)
>                 return 0;
> Could be modified to use bpf_get_current_pidns_info() as follows:
> 
>         struct bpf_pidns pidns;
>         bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
>         u32 pid = pidns.tgid;
>         u32 nsid = pidns.nsid;
>         if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
>                 return 0;
> 
> To find out the name PID namespace id of a process, you could use this command:
> 
> $ ps -h -o pidns -p <pid_of_interest>
> 
> Or this other command:
> 
> $ ls -Li /proc/<pid_of_interest>/ns/pid
> 
> Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@gmail.com>
> -
> ---
>  include/linux/bpf.h      |  1 +
>  include/uapi/linux/bpf.h | 26 ++++++++++++++++++-
>  kernel/bpf/core.c        |  1 +
>  kernel/bpf/helpers.c     | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/trace/bpf_trace.c |  2 ++
>  5 files changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index a2132e09dc1c..a77f5bd77bd8 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -930,6 +930,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
>  extern const struct bpf_func_proto bpf_spin_lock_proto;
>  extern const struct bpf_func_proto bpf_spin_unlock_proto;
>  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
>  
>  /* Shared helpers among cBPF and eBPF. */
>  void bpf_user_rnd_init_once(void);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3c38ac9a92a7..facc701c7873 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2366,6 +2366,18 @@ union bpf_attr {
>   *             current value is ect (ECN capable). Works with IPv6 and IPv4.
>   *     Return
>   *             1 if set, 0 if not set.
> + *
> + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> + *	Description
> + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> + *		current namespace and also device from /proc/self/ns/pid.
> + *		*size_of_pidns* must be the size of *pidns*
> + *
> + *		This helper is used when pid filtering is needed inside a
> + *		container as bpf_get_current_tgid() helper returns always the
> + *		pid id as seen by the root namespace.
> + *	Return
> + *		0 on success -EINVAL on error.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -2465,7 +2477,8 @@ union bpf_attr {
>  	FN(spin_unlock),		\
>  	FN(sk_fullsock),		\
>  	FN(tcp_sock),			\
> -	FN(skb_ecn_set_ce),
> +	FN(skb_ecn_set_ce),		\
> +	FN(get_current_pidns_info),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> @@ -3152,4 +3165,15 @@ struct bpf_line_info {
>  struct bpf_spin_lock {
>  	__u32	val;
>  };
> +
> +/* helper bpf_get_current_pidns_info will store the following
> + * data, dev will contain major/minor from /proc/self/pid.
> +*/
> +struct bpf_pidns_info {
> +	__u32 dev;
> +	__u32 nsid;
> +	__u32 tgid;
> +	__u32 pid;
> +};
> +
>  #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 3f08c257858e..06329fbed95f 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2044,6 +2044,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
>  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
>  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
>  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
>  
>  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
>  {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index a411fc17d265..95c3780a6ba7 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,11 @@
>  #include <linux/sched.h>
>  #include <linux/uidgid.h>
>  #include <linux/filter.h>
> +#include <linux/pid_namespace.h>
> +#include <linux/major.h>
> +#include <linux/stat.h>
> +#include <linux/namei.h>
> +#include <linux/version.h>
>  
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
> @@ -364,3 +369,65 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
>  };
>  #endif
>  #endif
> +
> +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> +	 size)
> +{
> +	const char *nspid = "/proc/self/ns/pid";
> +	struct pid_namespace *pidns = NULL;
> +	struct kstat ks;
> +	struct path kp;
> +	pid_t tgid = 0;
> +	pid_t pid = 0;
> +	int res = 0;
> +
> +	if (unlikely(size != sizeof(struct bpf_pidns_info)))
> +		goto clear;
> +
> +	pidns = task_active_pid_ns(current);
> +
> +	if (unlikely(!pidns))
> +		goto clear;
> +
> +	pidns_info->nsid =  pidns->ns.inum;
> +	pid = task_pid_nr_ns(current, pidns);
> +
> +	if (unlikely(!pid))
> +		goto clear;
> +
> +	tgid = task_tgid_nr_ns(current, pidns);
> +
> +	if (unlikely(!tgid))
> +		goto clear;
> +
> +	pidns_info->tgid = (u32) tgid;
> +	pidns_info->pid = (u32) pid;
> +
> +        kern_path(nspid, 0, &kp);
> +
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
> +    res = vfs_getattr(&kp, &ks, STATX_ALL, 0);
> +#else
> +    res = vfs_getattr(&kp, &ks);
> +#endif

Please access namespaces directly.
I suspect bpf helpers cannot do vfs_getattr. Something inside might sleep
while bpf progs are non preemptible.

Also please run ./scripts/checkpatch.pl on your patches
and read Documentation/bpf/bpf_devel_QA.rst.
Carlos Antonio Neira Bustos March 21, 2019, 2:40 p.m. UTC | #4
On Wed, Mar 20, 2019 at 06:23:20PM -0700, Alexei Starovoitov wrote:
> On Wed, Mar 20, 2019 at 01:49:22PM -0300, Carlos Antonio Neira Bustos wrote:
> > 
> > This is a series of patches to introduce a new helper called bpf_get_current_pidns_info,
> > this change has been splitted into the following patches:
> > 
> > 1- Feature introduction
> > 2- Update tools/.../bpf.h
> > 3- Self tests and samples
> > 
> > 
> > From 852a65906122b05b4d1a23af868b2c245d240402 Mon Sep 17 00:00:00 2001
> > From: Carlos <cneirabustos@gmail.com>
> > Date: Tue, 19 Mar 2019 19:38:48 -0300
> > Subject: [PATCH] [PATCH bpf-next 1/3] BPF: New helper to obtain namespace data
> >  from current task
> > 
> > This helper obtains the active namespace from current and returns pid, tgid,
> > device and namespace id as seen from that namespace, allowing to instrument
> > a process inside a container.
> > Device is read from /proc/self/ns/pid, as in the future it's possible that
> > different pid_ns files may belong to different devices, according
> > to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> > conference.
> > Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> > scripts but this helper returns the pid as seen by the root namespace which is
> > fine when a bcc script is not executed inside a container.
> > When the process of interest is inside a container, pid filtering will not work
> > if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> > returning the pid as it's seen by the current namespace where the script is
> > executing.
> > 
> > This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> > used to do pid filtering even inside a container.
> > 
> > For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
> > 
> >         u32 pid = bpf_get_current_pid_tgid() >> 32;
> >         if (pid != <pid_arg_passed_in>)
> >                 return 0;
> > Could be modified to use bpf_get_current_pidns_info() as follows:
> > 
> >         struct bpf_pidns pidns;
> >         bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
> >         u32 pid = pidns.tgid;
> >         u32 nsid = pidns.nsid;
> >         if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
> >                 return 0;
> > 
> > To find out the name PID namespace id of a process, you could use this command:
> > 
> > $ ps -h -o pidns -p <pid_of_interest>
> > 
> > Or this other command:
> > 
> > $ ls -Li /proc/<pid_of_interest>/ns/pid
> > 
> > Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@gmail.com>
> > -
> > ---
> >  include/linux/bpf.h      |  1 +
> >  include/uapi/linux/bpf.h | 26 ++++++++++++++++++-
> >  kernel/bpf/core.c        |  1 +
> >  kernel/bpf/helpers.c     | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  kernel/trace/bpf_trace.c |  2 ++
> >  5 files changed, 96 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index a2132e09dc1c..a77f5bd77bd8 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -930,6 +930,7 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
> >  extern const struct bpf_func_proto bpf_spin_lock_proto;
> >  extern const struct bpf_func_proto bpf_spin_unlock_proto;
> >  extern const struct bpf_func_proto bpf_get_local_storage_proto;
> > +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
> >  
> >  /* Shared helpers among cBPF and eBPF. */
> >  void bpf_user_rnd_init_once(void);
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 3c38ac9a92a7..facc701c7873 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2366,6 +2366,18 @@ union bpf_attr {
> >   *             current value is ect (ECN capable). Works with IPv6 and IPv4.
> >   *     Return
> >   *             1 if set, 0 if not set.
> > + *
> > + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> > + *	Description
> > + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> > + *		current namespace and also device from /proc/self/ns/pid.
> > + *		*size_of_pidns* must be the size of *pidns*
> > + *
> > + *		This helper is used when pid filtering is needed inside a
> > + *		container as bpf_get_current_tgid() helper returns always the
> > + *		pid id as seen by the root namespace.
> > + *	Return
> > + *		0 on success -EINVAL on error.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)		\
> >  	FN(unspec),			\
> > @@ -2465,7 +2477,8 @@ union bpf_attr {
> >  	FN(spin_unlock),		\
> >  	FN(sk_fullsock),		\
> >  	FN(tcp_sock),			\
> > -	FN(skb_ecn_set_ce),
> > +	FN(skb_ecn_set_ce),		\
> > +	FN(get_current_pidns_info),
> >  
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > @@ -3152,4 +3165,15 @@ struct bpf_line_info {
> >  struct bpf_spin_lock {
> >  	__u32	val;
> >  };
> > +
> > +/* helper bpf_get_current_pidns_info will store the following
> > + * data, dev will contain major/minor from /proc/self/pid.
> > +*/
> > +struct bpf_pidns_info {
> > +	__u32 dev;
> > +	__u32 nsid;
> > +	__u32 tgid;
> > +	__u32 pid;
> > +};
> > +
> >  #endif /* _UAPI__LINUX_BPF_H__ */
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 3f08c257858e..06329fbed95f 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -2044,6 +2044,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
> >  const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> > +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
> >  
> >  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> >  {
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index a411fc17d265..95c3780a6ba7 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -18,6 +18,11 @@
> >  #include <linux/sched.h>
> >  #include <linux/uidgid.h>
> >  #include <linux/filter.h>
> > +#include <linux/pid_namespace.h>
> > +#include <linux/major.h>
> > +#include <linux/stat.h>
> > +#include <linux/namei.h>
> > +#include <linux/version.h>
> >  
> >  /* If kernel subsystem is allowing eBPF programs to call this function,
> >   * inside its own verifier_ops->get_func_proto() callback it should return
> > @@ -364,3 +369,65 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> >  };
> >  #endif
> >  #endif
> > +
> > +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> > +	 size)
> > +{
> > +	const char *nspid = "/proc/self/ns/pid";
> > +	struct pid_namespace *pidns = NULL;
> > +	struct kstat ks;
> > +	struct path kp;
> > +	pid_t tgid = 0;
> > +	pid_t pid = 0;
> > +	int res = 0;
> > +
> > +	if (unlikely(size != sizeof(struct bpf_pidns_info)))
> > +		goto clear;
> > +
> > +	pidns = task_active_pid_ns(current);
> > +
> > +	if (unlikely(!pidns))
> > +		goto clear;
> > +
> > +	pidns_info->nsid =  pidns->ns.inum;
> > +	pid = task_pid_nr_ns(current, pidns);
> > +
> > +	if (unlikely(!pid))
> > +		goto clear;
> > +
> > +	tgid = task_tgid_nr_ns(current, pidns);
> > +
> > +	if (unlikely(!tgid))
> > +		goto clear;
> > +
> > +	pidns_info->tgid = (u32) tgid;
> > +	pidns_info->pid = (u32) pid;
> > +
> > +        kern_path(nspid, 0, &kp);
> > +
> > +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
> > +    res = vfs_getattr(&kp, &ks, STATX_ALL, 0);
> > +#else
> > +    res = vfs_getattr(&kp, &ks);
> > +#endif
> 
> Please access namespaces directly.
> I suspect bpf helpers cannot do vfs_getattr. Something inside might sleep
> while bpf progs are non preemptible.
> 
> Also please run ./scripts/checkpatch.pl on your patches
> and read Documentation/bpf/bpf_devel_QA.rst.
> 
Hello Alexei, 

Thanks for checking this out and also thanks for the documentation link.
At this point I have a couple of questions:

* What is the reason to not use the current namespace api instead of directly
  accessing namespaces?.

* Regarding bpf programs not being preemptible. If we add spin_locks to the
  vfs_getattr call, would that be an acceptable solution?

   spin_lock(&bpf_lock);
   res = vfs_getattr(&kp, &ks);
   spin_unlock(&bpf_lock);

Is there another way to interact with the vfs layer within a bpf helper?.

Bests
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2132e09dc1c..a77f5bd77bd8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -930,6 +930,7 @@  extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
 extern const struct bpf_func_proto bpf_spin_lock_proto;
 extern const struct bpf_func_proto bpf_spin_unlock_proto;
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3c38ac9a92a7..facc701c7873 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2366,6 +2366,18 @@  union bpf_attr {
  *             current value is ect (ECN capable). Works with IPv6 and IPv4.
  *     Return
  *             1 if set, 0 if not set.
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ *	Description
+ *		Copies into *pidns* pid, namespace id and tgid as seen by the
+ *		current namespace and also device from /proc/self/ns/pid.
+ *		*size_of_pidns* must be the size of *pidns*
+ *
+ *		This helper is used when pid filtering is needed inside a
+ *		container as bpf_get_current_tgid() helper returns always the
+ *		pid id as seen by the root namespace.
+ *	Return
+ *		0 on success -EINVAL on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2465,7 +2477,8 @@  union bpf_attr {
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),
+	FN(skb_ecn_set_ce),		\
+	FN(get_current_pidns_info),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3152,4 +3165,15 @@  struct bpf_line_info {
 struct bpf_spin_lock {
 	__u32	val;
 };
+
+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/pid.
+*/
+struct bpf_pidns_info {
+	__u32 dev;
+	__u32 nsid;
+	__u32 tgid;
+	__u32 pid;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f08c257858e..06329fbed95f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2044,6 +2044,7 @@  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a411fc17d265..95c3780a6ba7 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,11 @@ 
 #include <linux/sched.h>
 #include <linux/uidgid.h>
 #include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/namei.h>
+#include <linux/version.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -364,3 +369,65 @@  const struct bpf_func_proto bpf_get_local_storage_proto = {
 };
 #endif
 #endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+	 size)
+{
+	const char *nspid = "/proc/self/ns/pid";
+	struct pid_namespace *pidns = NULL;
+	struct kstat ks;
+	struct path kp;
+	pid_t tgid = 0;
+	pid_t pid = 0;
+	int res = 0;
+
+	if (unlikely(size != sizeof(struct bpf_pidns_info)))
+		goto clear;
+
+	pidns = task_active_pid_ns(current);
+
+	if (unlikely(!pidns))
+		goto clear;
+
+	pidns_info->nsid =  pidns->ns.inum;
+	pid = task_pid_nr_ns(current, pidns);
+
+	if (unlikely(!pid))
+		goto clear;
+
+	tgid = task_tgid_nr_ns(current, pidns);
+
+	if (unlikely(!tgid))
+		goto clear;
+
+	pidns_info->tgid = (u32) tgid;
+	pidns_info->pid = (u32) pid;
+
+        kern_path(nspid, 0, &kp);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0)
+    res = vfs_getattr(&kp, &ks, STATX_ALL, 0);
+#else
+    res = vfs_getattr(&kp, &ks);
+#endif
+       if (unlikely(res))
+               goto clear;
+
+       pidns_info->dev =  ks.dev;
+
+       return 0;
+
+       clear:
+	if (pidns_info)
+		memset((void *)pidns, 0, (size_t) size);
+
+	return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+	.func	= bpf_get_current_pidns_info,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f1a86a0d881d..48c86bc8680f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -604,6 +604,8 @@  tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
 #endif
+	case BPF_FUNC_get_current_pidns_info:
+		return &bpf_get_current_pidns_info_proto;
 	default:
 		return NULL;
 	}