diff mbox series

[bpf-next,2/8] xdp: simplify cpumap cleanup

Message ID 20191218105400.2895-3-bjorn.topel@gmail.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series Simplify xdp_do_redirect_map()/xdp_do_flush_map() and XDP maps | expand

Commit Message

Björn Töpel Dec. 18, 2019, 10:53 a.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

After the RCU flavor consolidation [1], call_rcu() and
synchronize_rcu() waits for preempt-disable regions (NAPI) in addition
to the read-side critical sections. As a result of this, the cleanup
code in cpumap can be simplified

* There is no longer a need to flush in __cpu_map_entry_free, since we
  know that this has been done when the call_rcu() callback is
  triggered.

* When freeing the map, there is no need to explicitly wait for a
  flush. It's guaranteed to be done after the synchronize_rcu() call
  in cpu_map_free().

[1] https://lwn.net/Articles/777036/

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 kernel/bpf/cpumap.c | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

Comments

Toke Høiland-Jørgensen Dec. 18, 2019, 11:15 a.m. UTC | #1
Björn Töpel <bjorn.topel@gmail.com> writes:

> From: Björn Töpel <bjorn.topel@intel.com>
>
> After the RCU flavor consolidation [1], call_rcu() and
> synchronize_rcu() waits for preempt-disable regions (NAPI) in addition
> to the read-side critical sections. As a result of this, the cleanup
> code in cpumap can be simplified
>
> * There is no longer a need to flush in __cpu_map_entry_free, since we
>   know that this has been done when the call_rcu() callback is
>   triggered.
>
> * When freeing the map, there is no need to explicitly wait for a
>   flush. It's guaranteed to be done after the synchronize_rcu() call
>   in cpu_map_free().
>
> [1] https://lwn.net/Articles/777036/
>
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Jakub Kicinski Dec. 18, 2019, 5:47 p.m. UTC | #2
On Wed, 18 Dec 2019 11:53:54 +0100, Björn Töpel wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> After the RCU flavor consolidation [1], call_rcu() and
> synchronize_rcu() waits for preempt-disable regions (NAPI) in addition
> to the read-side critical sections. As a result of this, the cleanup
> code in cpumap can be simplified
> 
> * There is no longer a need to flush in __cpu_map_entry_free, since we
>   know that this has been done when the call_rcu() callback is
>   triggered.
> 
> * When freeing the map, there is no need to explicitly wait for a
>   flush. It's guaranteed to be done after the synchronize_rcu() call
>   in cpu_map_free().
> 
> [1] https://lwn.net/Articles/777036/
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

Probably transient but:

../kernel/bpf/cpumap.c: In function "cpu_map_free":
../kernel/bpf/cpumap.c:502:6: warning: unused variable "cpu" [-Wunused-variable]
  502 |  int cpu;
      |      ^~~

I think there are also warnings in patch 4.
Björn Töpel Dec. 18, 2019, 5:48 p.m. UTC | #3
On 2019-12-18 18:47, Jakub Kicinski wrote:
> On Wed, 18 Dec 2019 11:53:54 +0100, Björn Töpel wrote:
>> From: Björn Töpel <bjorn.topel@intel.com>
>>
>> After the RCU flavor consolidation [1], call_rcu() and
>> synchronize_rcu() waits for preempt-disable regions (NAPI) in addition
>> to the read-side critical sections. As a result of this, the cleanup
>> code in cpumap can be simplified
>>
>> * There is no longer a need to flush in __cpu_map_entry_free, since we
>>    know that this has been done when the call_rcu() callback is
>>    triggered.
>>
>> * When freeing the map, there is no need to explicitly wait for a
>>    flush. It's guaranteed to be done after the synchronize_rcu() call
>>    in cpu_map_free().
>>
>> [1] https://lwn.net/Articles/777036/
>>
>> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
> 
> Probably transient but:
> 
> ../kernel/bpf/cpumap.c: In function "cpu_map_free":
> ../kernel/bpf/cpumap.c:502:6: warning: unused variable "cpu" [-Wunused-variable]
>    502 |  int cpu;
>        |      ^~~
> 
> I think there are also warnings in patch 4.
> 

Ugh. Thanks, I'll respin!


Björn
diff mbox series

Patch

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ef49e17ae47c..fbf176e0a2ab 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -75,7 +75,7 @@  struct bpf_cpu_map {
 	struct list_head __percpu *flush_list;
 };
 
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx);
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
 
 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 {
@@ -399,7 +399,6 @@  static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
 static void __cpu_map_entry_free(struct rcu_head *rcu)
 {
 	struct bpf_cpu_map_entry *rcpu;
-	int cpu;
 
 	/* This cpu_map_entry have been disconnected from map and one
 	 * RCU graze-period have elapsed.  Thus, XDP cannot queue any
@@ -408,13 +407,6 @@  static void __cpu_map_entry_free(struct rcu_head *rcu)
 	 */
 	rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
 
-	/* Flush remaining packets in percpu bulkq */
-	for_each_online_cpu(cpu) {
-		struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
-
-		/* No concurrent bq_enqueue can run at this point */
-		bq_flush_to_queue(bq, false);
-	}
 	free_percpu(rcpu->bulkq);
 	/* Cannot kthread_stop() here, last put free rcpu resources */
 	put_cpu_map_entry(rcpu);
@@ -522,18 +514,6 @@  static void cpu_map_free(struct bpf_map *map)
 	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
-	/* To ensure all pending flush operations have completed wait for flush
-	 * list be empty on _all_ cpus. Because the above synchronize_rcu()
-	 * ensures the map is disconnected from the program we can assume no new
-	 * items will be added to the list.
-	 */
-	for_each_online_cpu(cpu) {
-		struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu);
-
-		while (!list_empty(flush_list))
-			cond_resched();
-	}
-
 	/* For cpu_map the remote CPUs can still be using the entries
 	 * (struct bpf_cpu_map_entry).
 	 */
@@ -599,7 +579,7 @@  const struct bpf_map_ops cpu_map_ops = {
 	.map_check_btf		= map_check_no_btf,
 };
 
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
 {
 	struct bpf_cpu_map_entry *rcpu = bq->obj;
 	unsigned int processed = 0, drops = 0;
@@ -620,10 +600,7 @@  static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
 		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			if (likely(in_napi_ctx))
-				xdp_return_frame_rx_napi(xdpf);
-			else
-				xdp_return_frame(xdpf);
+			xdp_return_frame_rx_napi(xdpf);
 		}
 		processed++;
 	}
@@ -646,7 +623,7 @@  static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
-		bq_flush_to_queue(bq, true);
+		bq_flush_to_queue(bq);
 
 	/* Notice, xdp_buff/page MUST be queued here, long enough for
 	 * driver to code invoking us to finished, due to driver
@@ -688,7 +665,7 @@  void __cpu_map_flush(struct bpf_map *map)
 	struct xdp_bulk_queue *bq, *tmp;
 
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
-		bq_flush_to_queue(bq, true);
+		bq_flush_to_queue(bq);
 
 		/* If already running, costs spin_lock_irqsave + smb_mb */
 		wake_up_process(bq->obj->kthread);