diff mbox series

[bpf] xsk: fix umem cleanup bug at socket destruct

Message ID 1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com
State Superseded
Headers show
Series [bpf] xsk: fix umem cleanup bug at socket destruct | expand

Commit Message

Magnus Karlsson Nov. 20, 2020, 11:53 a.m. UTC
From: Magnus Karlsson <magnus.karlsson@intel.com>

Fix a bug that is triggered when a partially setup socket is
destroyed. For a fully setup socket, a socket that has been bound to a
device, the cleanup of the umem is performed at the end of the buffer
pool's cleanup work queue item. This has to be performed in a work
queue, and not in RCU cleanup, as it is doing a vunmap that cannot
execute in interrupt context. However, when a socket has only been
partially set up so that a umem has been created but the buffer pool
has not, the code erroneously directly calls the umem cleanup function
instead of using a work queue, and this leads to a BUG_ON() in
vunmap().

As there in this case is no buffer pool, we cannot use its work queue,
so we need to introduce a work queue for the umem and schedule this for
the cleanup. So in the case there is no pool, we are going to use the
umem's own work queue to schedule the cleanup. But if there is a
pool, the cleanup of the umem is still being performed by the pool's
work queue, as it is important that the umem is cleaned up after the
pool.

Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close")
Reported-by: Marek Majtyka <marekx.majtyka@intel.com>
Tested-by: Marek Majtyka <marekx.majtyka@intel.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
---
 include/net/xdp_sock.h  |  1 +
 net/xdp/xdp_umem.c      | 19 ++++++++++++++++---
 net/xdp/xdp_umem.h      |  2 +-
 net/xdp/xsk.c           |  2 +-
 net/xdp/xsk_buff_pool.c |  2 +-
 5 files changed, 20 insertions(+), 6 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Nov. 20, 2020, 3 p.m. UTC | #1
Hello:

This patch was applied to bpf/bpf.git (refs/heads/master):

On Fri, 20 Nov 2020 12:53:39 +0100 you wrote:
> From: Magnus Karlsson <magnus.karlsson@intel.com>
> 
> Fix a bug that is triggered when a partially setup socket is
> destroyed. For a fully setup socket, a socket that has been bound to a
> device, the cleanup of the umem is performed at the end of the buffer
> pool's cleanup work queue item. This has to be performed in a work
> queue, and not in RCU cleanup, as it is doing a vunmap that cannot
> execute in interrupt context. However, when a socket has only been
> partially set up so that a umem has been created but the buffer pool
> has not, the code erroneously directly calls the umem cleanup function
> instead of using a work queue, and this leads to a BUG_ON() in
> vunmap().
> 
> [...]

Here is the summary with links:
  - [bpf] xsk: fix umem cleanup bug at socket destruct
    https://git.kernel.org/bpf/bpf/c/537cf4e3cc2f

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1a9559c..4f4e93b 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -31,6 +31,7 @@  struct xdp_umem {
 	struct page **pgs;
 	int id;
 	struct list_head xsk_dma_list;
+	struct work_struct work;
 };
 
 struct xsk_map {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 56d052b..56a28a6 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -66,18 +66,31 @@  static void xdp_umem_release(struct xdp_umem *umem)
 	kfree(umem);
 }
 
+static void xdp_umem_release_deferred(struct work_struct *work)
+{
+	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
+
+	xdp_umem_release(umem);
+}
+
 void xdp_get_umem(struct xdp_umem *umem)
 {
 	refcount_inc(&umem->users);
 }
 
-void xdp_put_umem(struct xdp_umem *umem)
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
 {
 	if (!umem)
 		return;
 
-	if (refcount_dec_and_test(&umem->users))
-		xdp_umem_release(umem);
+	if (refcount_dec_and_test(&umem->users)) {
+		if (defer_cleanup) {
+			INIT_WORK(&umem->work, xdp_umem_release_deferred);
+			schedule_work(&umem->work);
+		} else {
+			xdp_umem_release(umem);
+		}
+	}
 }
 
 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 181fdda..aa9fe27 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -9,7 +9,7 @@ 
 #include <net/xdp_sock_drv.h>
 
 void xdp_get_umem(struct xdp_umem *umem);
-void xdp_put_umem(struct xdp_umem *umem);
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup);
 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
 
 #endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cfbec39..5a6cdf7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1147,7 +1147,7 @@  static void xsk_destruct(struct sock *sk)
 		return;
 
 	if (!xp_put_pool(xs->pool))
-		xdp_put_umem(xs->umem);
+		xdp_put_umem(xs->umem, !xs->pool);
 
 	sk_refcnt_debug_dec(sk);
 }
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 8a3bf4e..3c5a142 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -242,7 +242,7 @@  static void xp_release_deferred(struct work_struct *work)
 		pool->cq = NULL;
 	}
 
-	xdp_put_umem(pool->umem);
+	xdp_put_umem(pool->umem, false);
 	xp_destroy(pool);
 }