diff mbox series

[RFC,bpf-next,1/5] bpf: RCU protect used_maps array and count

Message ID 7e37411ca33ae89e2a98dd95707a35caf2fd542e.1597427271.git.zhuyifei@google.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series Allow storage of flexible metadata information for eBPF programs | expand

Commit Message

YiFei Zhu Aug. 14, 2020, 7:15 p.m. UTC
From: YiFei Zhu <zhuyifei@google.com>

To support modifying the used_maps array, we use RCU to protect the
use of the counter and the array. A mutex is used as the write-side
lock, and it is initialized in the verifier where the rcu struct is
allocated.

Most uses are non-sleeping and very straight forward, just holding
rcu_read_lock. bpf_check_tail_call can be called for a cBPF map
(by bpf_prog_select_runtime, bpf_migrate_filter) so an extra check
is added for the case when the program did not pass through the
eBPF verifier and the used_maps pointer is NULL.

The Netronome driver does allocate memory while reading the used_maps
array so for simplicity it is made to hold the write-side lock.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
---
 .../net/ethernet/netronome/nfp/bpf/offload.c  | 33 +++++++++++------
 include/linux/bpf.h                           | 11 +++++-
 kernel/bpf/core.c                             | 25 ++++++++++---
 kernel/bpf/syscall.c                          | 19 ++++++++--
 kernel/bpf/verifier.c                         | 37 +++++++++++++------
 net/core/dev.c                                | 12 ++++--
 6 files changed, 100 insertions(+), 37 deletions(-)

Comments

Alexei Starovoitov Aug. 19, 2020, 12:48 a.m. UTC | #1
On Fri, Aug 14, 2020 at 02:15:54PM -0500, YiFei Zhu wrote:
> @@ -3263,6 +3268,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
>  	struct bpf_prog_stats stats;
>  	char __user *uinsns;
>  	u32 ulen;
> +	const struct bpf_used_maps *used_maps;
>  	int err;
>  
>  	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
> @@ -3284,19 +3290,24 @@ static int bpf_prog_get_info_by_fd(struct file *file,
>  	memcpy(info.tag, prog->tag, sizeof(prog->tag));
>  	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
>  
> +	rcu_read_lock();
> +	used_maps = rcu_dereference(prog->aux->used_maps);
> +
>  	ulen = info.nr_map_ids;
> -	info.nr_map_ids = prog->aux->used_map_cnt;
> +	info.nr_map_ids = used_maps->cnt;
>  	ulen = min_t(u32, info.nr_map_ids, ulen);
>  	if (ulen) {
>  		u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
>  		u32 i;
>  
>  		for (i = 0; i < ulen; i++)
> -			if (put_user(prog->aux->used_maps[i]->id,
> +			if (put_user(used_maps->arr[i]->id,

put_user() under rcu_read_lock() is not allowed.
You should see a splat like this:
[    2.297943] BUG: sleeping function called from invalid context at kernel/bpf/syscall.c:3305
[    2.305554] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 81, name: test_progs
[    2.312802] 1 lock held by test_progs/81:
[    2.316545]  #0: ffffffff82265760 (rcu_read_lock){....}-{1:2}, at: bpf_prog_get_info_by_fd.isra.31+0xb9/0xdf0
[    2.325446] Preemption disabled at:
[    2.325454] [<ffffffff812aec64>] __fd_install+0x24/0x2b0
[    2.333571] CPU: 1 PID: 81 Comm: test_progs Not tainted 5.8.0-ga96d3fdf9 #1
[    2.339818] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[    2.347795] Call Trace:
[    2.350082]  dump_stack+0x78/0xab
[    2.353109]  ___might_sleep+0x17d/0x260
[    2.356554]  __might_fault+0x34/0x90
[    2.359873]  bpf_prog_get_info_by_fd.isra.31+0x21b/0xdf0
[    2.364674]  ? __lock_acquire+0x2e4/0x1df0
[    2.368377]  ? bpf_obj_get_info_by_fd+0x20f/0x3e0
[    2.372621]  bpf_obj_get_info_by_fd+0x20f/0x3e0

Please test your patches with kernel debug flags
like CONFIG_DEBUG_ATOMIC_SLEEP=y at a minimum.
kasan and lockdep are good to have as well.

In general I think rcu here is overkill.
Why not to use mutex everywhere? It's not like it's in critical path.

Also I think better name is needed. ADD_MAP is too specific.
When we get around to implement Daniel's suggestion of replacing
one map with another ADD_MAP as command won't fit.
Single purpose commands are ok, but this one feels too narrow.
May be BPF_PROG_BIND_MAP ?
Then later adding target_map_fd field to prog_bind_map struct would do it.
I thought about BPF_PROG_ATTACH_MAP name, but we use the word "attach"
in too many places and it could be confusing.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index ac02369174a9..74ed42b678e2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -111,34 +111,45 @@  static int
 nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 		    struct bpf_prog *prog)
 {
-	int i, cnt, err;
+	struct bpf_used_maps *used_maps;
+	int i, cnt, err = 0;
+
+	/* We are calling sleepable functions while reading used_maps array */
+	mutex_lock(&prog->aux->used_maps_mutex);
+
+	used_maps = rcu_dereference_protected(prog->aux->used_maps,
+			lockdep_is_held(&prog->aux->used_maps_mutex));
 
 	/* Quickly count the maps we will have to remember */
 	cnt = 0;
-	for (i = 0; i < prog->aux->used_map_cnt; i++)
-		if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
+	for (i = 0; i < used_maps->cnt; i++)
+		if (bpf_map_offload_neutral(used_maps->arr[i]))
 			cnt++;
 	if (!cnt)
-		return 0;
+		goto out;
 
 	nfp_prog->map_records = kmalloc_array(cnt,
 					      sizeof(nfp_prog->map_records[0]),
 					      GFP_KERNEL);
-	if (!nfp_prog->map_records)
-		return -ENOMEM;
+	if (!nfp_prog->map_records) {
+		err = -ENOMEM;
+		goto out;
+	}
 
-	for (i = 0; i < prog->aux->used_map_cnt; i++)
-		if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
+	for (i = 0; i < used_maps->cnt; i++)
+		if (bpf_map_offload_neutral(used_maps->arr[i])) {
 			err = nfp_map_ptr_record(bpf, nfp_prog,
-						 prog->aux->used_maps[i]);
+						 used_maps->arr[i]);
 			if (err) {
 				nfp_map_ptrs_forget(bpf, nfp_prog);
-				return err;
+				goto out;
 			}
 		}
 	WARN_ON(cnt != nfp_prog->map_records_cnt);
 
-	return 0;
+out:
+	mutex_unlock(&prog->aux->used_maps_mutex);
+	return err;
 }
 
 static int
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cef4ef0d2b4e..417189b4061d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -689,9 +689,15 @@  struct bpf_ctx_arg_aux {
 	u32 btf_id;
 };
 
+/* rcu struct for prog used_maps */
+struct bpf_used_maps {
+	u32 cnt;
+	struct bpf_map **arr;
+	struct rcu_head rcu;
+};
+
 struct bpf_prog_aux {
 	atomic64_t refcnt;
-	u32 used_map_cnt;
 	u32 max_ctx_offset;
 	u32 max_pkt_offset;
 	u32 max_tp_access;
@@ -722,7 +728,8 @@  struct bpf_prog_aux {
 	u32 size_poke_tab;
 	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
-	struct bpf_map **used_maps;
+	struct mutex used_maps_mutex; /* write-side mutex for used_maps */
+	struct bpf_used_maps __rcu *used_maps;
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bde93344164d..9766aa0337d9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1746,11 +1746,16 @@  bool bpf_prog_array_compatible(struct bpf_array *array,
 
 static int bpf_check_tail_call(const struct bpf_prog *fp)
 {
-	struct bpf_prog_aux *aux = fp->aux;
+	const struct bpf_used_maps *used_maps;
 	int i;
 
-	for (i = 0; i < aux->used_map_cnt; i++) {
-		struct bpf_map *map = aux->used_maps[i];
+	rcu_read_lock();
+	used_maps = rcu_dereference(fp->aux->used_maps);
+	if (!used_maps)
+		goto out;
+
+	for (i = 0; i < used_maps->cnt; i++) {
+		struct bpf_map *map = used_maps->arr[i];
 		struct bpf_array *array;
 
 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
@@ -1761,6 +1766,8 @@  static int bpf_check_tail_call(const struct bpf_prog *fp)
 			return -EINVAL;
 	}
 
+out:
+	rcu_read_unlock();
 	return 0;
 }
 
@@ -2113,8 +2120,16 @@  void __bpf_free_used_maps(struct bpf_prog_aux *aux,
 
 static void bpf_free_used_maps(struct bpf_prog_aux *aux)
 {
-	__bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
-	kfree(aux->used_maps);
+	struct bpf_used_maps *used_maps = aux->used_maps;
+
+	if (!used_maps)
+		return;
+
+	__bpf_free_used_maps(aux, used_maps->arr, used_maps->cnt);
+	kfree(used_maps->arr);
+	kfree(used_maps);
+
+	mutex_destroy(&aux->used_maps_mutex);
 }
 
 static void bpf_prog_free_deferred(struct work_struct *work)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2f343ce15747..3fde9dc4b595 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3149,11 +3149,15 @@  static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
 					      unsigned long addr, u32 *off,
 					      u32 *type)
 {
+	const struct bpf_used_maps *used_maps;
 	const struct bpf_map *map;
 	int i;
 
-	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
-		map = prog->aux->used_maps[i];
+	rcu_read_lock();
+	used_maps = rcu_dereference(prog->aux->used_maps);
+
+	for (i = 0, *off = 0; i < used_maps->cnt; i++) {
+		map = used_maps->arr[i];
 		if (map == (void *)addr) {
 			*type = BPF_PSEUDO_MAP_FD;
 			return map;
@@ -3166,6 +3170,7 @@  static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
 		}
 	}
 
+	rcu_read_unlock();
 	return NULL;
 }
 
@@ -3263,6 +3268,7 @@  static int bpf_prog_get_info_by_fd(struct file *file,
 	struct bpf_prog_stats stats;
 	char __user *uinsns;
 	u32 ulen;
+	const struct bpf_used_maps *used_maps;
 	int err;
 
 	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
@@ -3284,19 +3290,24 @@  static int bpf_prog_get_info_by_fd(struct file *file,
 	memcpy(info.tag, prog->tag, sizeof(prog->tag));
 	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
 
+	rcu_read_lock();
+	used_maps = rcu_dereference(prog->aux->used_maps);
+
 	ulen = info.nr_map_ids;
-	info.nr_map_ids = prog->aux->used_map_cnt;
+	info.nr_map_ids = used_maps->cnt;
 	ulen = min_t(u32, info.nr_map_ids, ulen);
 	if (ulen) {
 		u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
 		u32 i;
 
 		for (i = 0; i < ulen; i++)
-			if (put_user(prog->aux->used_maps[i]->id,
+			if (put_user(used_maps->arr[i]->id,
 				     &user_map_ids[i]))
 				return -EFAULT;
 	}
 
+	rcu_read_unlock();
+
 	err = set_info_rec_size(&info);
 	if (err)
 		return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b6ccfce3bf4c..9a6ca16667c7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11232,25 +11232,38 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 		goto err_release_maps;
 	}
 
-	if (ret == 0 && env->used_map_cnt) {
+	if (ret == 0) {
 		/* if program passed verifier, update used_maps in bpf_prog_info */
-		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
-							  sizeof(env->used_maps[0]),
+		struct bpf_used_maps *used_maps = kzalloc(sizeof(*used_maps),
 							  GFP_KERNEL);
-
-		if (!env->prog->aux->used_maps) {
+		if (!used_maps) {
 			ret = -ENOMEM;
 			goto err_release_maps;
 		}
 
-		memcpy(env->prog->aux->used_maps, env->used_maps,
-		       sizeof(env->used_maps[0]) * env->used_map_cnt);
-		env->prog->aux->used_map_cnt = env->used_map_cnt;
+		if (env->used_map_cnt) {
+			used_maps->arr = kmalloc_array(env->used_map_cnt,
+						       sizeof(env->used_maps[0]),
+						       GFP_KERNEL);
 
-		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
-		 * bpf_ld_imm64 instructions
-		 */
-		convert_pseudo_ld_imm64(env);
+			if (!used_maps->arr) {
+				kfree(used_maps);
+				ret = -ENOMEM;
+				goto err_release_maps;
+			}
+
+			memcpy(used_maps->arr, env->used_maps,
+			       sizeof(env->used_maps[0]) * env->used_map_cnt);
+			used_maps->cnt = env->used_map_cnt;
+
+			/* program is valid. Convert pseudo bpf_ld_imm64 into generic
+			 * bpf_ld_imm64 instructions
+			 */
+			convert_pseudo_ld_imm64(env);
+		}
+
+		env->prog->aux->used_maps = used_maps;
+		mutex_init(&env->prog->aux->used_maps_mutex);
 	}
 
 	if (ret == 0)
diff --git a/net/core/dev.c b/net/core/dev.c
index 7df6c9617321..bebbf8abd9a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5439,17 +5439,23 @@  static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 	int ret = 0;
 
 	if (new) {
+		const struct bpf_used_maps *used_maps;
 		u32 i;
 
+		rcu_read_lock();
+		used_maps = rcu_dereference(new->aux->used_maps);
+
 		/* generic XDP does not work with DEVMAPs that can
 		 * have a bpf_prog installed on an entry
 		 */
-		for (i = 0; i < new->aux->used_map_cnt; i++) {
-			if (dev_map_can_have_prog(new->aux->used_maps[i]))
+		for (i = 0; i < used_maps->cnt; i++) {
+			if (dev_map_can_have_prog(used_maps->arr[i]))
 				return -EINVAL;
-			if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+			if (cpu_map_prog_allowed(used_maps->arr[i]))
 				return -EINVAL;
 		}
+
+		rcu_read_unlock();
 	}
 
 	switch (xdp->command) {