diff mbox series

[V2,16/20] bpf: Replace open coded recursion prevention

Message ID 20200220204618.913228118@linutronix.de
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: Make BPF and PREEMPT_RT co-exist | expand

Commit Message

Thomas Gleixner Feb. 20, 2020, 8:45 p.m. UTC
The required protection is that the caller cannot be migrated to a
different CPU as these functions end up in places which take either a hash
bucket lock or might trigger a kprobe inside the memory allocator. Both
scenarios can lead to deadlocks. The deadlock prevention is per CPU by
incrementing a per CPU variable which temporarily blocks the invocation of
BPF programs from perf and kprobes.

Replace the open coded preempt_[dis|en]able and __this_cpu_[inc|dec] pairs
with the new helper functions. These functions are already prepared to
make BPF work on PREEMPT_RT enabled kernels.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2: New patch
---
 kernel/bpf/hashtab.c |   12 ++++--------
 kernel/bpf/syscall.c |   27 ++++++++-------------------
 2 files changed, 12 insertions(+), 27 deletions(-)
diff mbox series

Patch

--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1319,8 +1319,7 @@  static int
 	}
 
 again:
-	migrate_disable();
-	this_cpu_inc(bpf_prog_active);
+	bpf_disable_instrumentation();
 	rcu_read_lock();
 again_nocopy:
 	dst_key = keys;
@@ -1338,8 +1337,7 @@  static int
 			ret = -ENOSPC;
 		raw_spin_unlock_irqrestore(&b->lock, flags);
 		rcu_read_unlock();
-		this_cpu_dec(bpf_prog_active);
-		migrate_enable();
+		bpf_enable_instrumentation();
 		goto after_loop;
 	}
 
@@ -1347,8 +1345,7 @@  static int
 		bucket_size = bucket_cnt;
 		raw_spin_unlock_irqrestore(&b->lock, flags);
 		rcu_read_unlock();
-		this_cpu_dec(bpf_prog_active);
-		migrate_enable();
+		bpf_enable_instrumentation();
 		kvfree(keys);
 		kvfree(values);
 		goto alloc;
@@ -1397,8 +1394,7 @@  static int
 	}
 
 	rcu_read_unlock();
-	this_cpu_dec(bpf_prog_active);
-	migrate_enable();
+	bpf_enable_instrumentation();
 	if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
 	    key_size * bucket_cnt) ||
 	    copy_to_user(uvalues + total * value_size, values,
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -171,11 +171,7 @@  static int bpf_map_update_value(struct b
 						    flags);
 	}
 
-	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
-	 * inside bpf map update or delete otherwise deadlocks are possible
-	 */
-	preempt_disable();
-	__this_cpu_inc(bpf_prog_active);
+	bpf_disable_instrumentation();
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 		err = bpf_percpu_hash_update(map, key, value, flags);
@@ -206,8 +202,7 @@  static int bpf_map_update_value(struct b
 		err = map->ops->map_update_elem(map, key, value, flags);
 		rcu_read_unlock();
 	}
-	__this_cpu_dec(bpf_prog_active);
-	preempt_enable();
+	bpf_enable_instrumentation();
 	maybe_wait_bpf_programs(map);
 
 	return err;
@@ -222,8 +217,7 @@  static int bpf_map_copy_value(struct bpf
 	if (bpf_map_is_dev_bound(map))
 		return bpf_map_offload_lookup_elem(map, key, value);
 
-	preempt_disable();
-	this_cpu_inc(bpf_prog_active);
+	bpf_disable_instrumentation();
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 		err = bpf_percpu_hash_copy(map, key, value);
@@ -268,8 +262,7 @@  static int bpf_map_copy_value(struct bpf
 		rcu_read_unlock();
 	}
 
-	this_cpu_dec(bpf_prog_active);
-	preempt_enable();
+	bpf_enable_instrumentation();
 	maybe_wait_bpf_programs(map);
 
 	return err;
@@ -1136,13 +1129,11 @@  static int map_delete_elem(union bpf_att
 		goto out;
 	}
 
-	preempt_disable();
-	__this_cpu_inc(bpf_prog_active);
+	bpf_disable_instrumentation();
 	rcu_read_lock();
 	err = map->ops->map_delete_elem(map, key);
 	rcu_read_unlock();
-	__this_cpu_dec(bpf_prog_active);
-	preempt_enable();
+	bpf_enable_instrumentation();
 	maybe_wait_bpf_programs(map);
 out:
 	kfree(key);
@@ -1254,13 +1245,11 @@  int generic_map_delete_batch(struct bpf_
 			break;
 		}
 
-		preempt_disable();
-		__this_cpu_inc(bpf_prog_active);
+		bpf_disable_instrumentation();
 		rcu_read_lock();
 		err = map->ops->map_delete_elem(map, key);
 		rcu_read_unlock();
-		__this_cpu_dec(bpf_prog_active);
-		preempt_enable();
+		bpf_enable_instrumentation();
 		maybe_wait_bpf_programs(map);
 		if (err)
 			break;