diff mbox series

[bpf-next,4/6] bpf, sockmap: remove dropped data on errors in redirect case

Message ID 160221868511.12042.12285689875540180401.stgit@john-Precision-5820-Tower
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series sockmap/sk_skb program memory acct fixes | expand

Commit Message

John Fastabend Oct. 9, 2020, 4:44 a.m. UTC
In the sk_skb redirect case we didn't handle the case where we overrun
the sk_rmem_alloc entry on ingress redirect or sk_wmem_alloc on egress.
Because we didn't have anything implemented we simply dropped the skb.
This meant data could be dropped if socket memory accounting was in
place.

This fixes the above dropped data case by moving the memory checks
later in the code where we actually do the send or recv. This pushes
those checks into the workqueue and allows us to return an EAGAIN error
which in turn allows us to try again later from the workqueue.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 net/core/skmsg.c |   26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

Comments

kernel test robot Oct. 9, 2020, 7:27 a.m. UTC | #1
Hi John,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/John-Fastabend/sockmap-sk_skb-program-memory-acct-fixes/20201009-124625
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/3a8d6f22c5f47a013278031170cb559d1f6d1e69
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review John-Fastabend/sockmap-sk_skb-program-memory-acct-fixes/20201009-124625
        git checkout 3a8d6f22c5f47a013278031170cb559d1f6d1e69
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   net/core/skmsg.c: In function 'sk_psock_skb_redirect':
>> net/core/skmsg.c:714:7: warning: variable 'ingress' set but not used [-Wunused-but-set-variable]
     714 |  bool ingress;
         |       ^~~~~~~

vim +/ingress +714 net/core/skmsg.c

604326b41a6fb9 Daniel Borkmann 2018-10-13  709  
93dd5f185916b0 John Fastabend  2020-06-25  710  static void sk_psock_skb_redirect(struct sk_buff *skb)
604326b41a6fb9 Daniel Borkmann 2018-10-13  711  {
604326b41a6fb9 Daniel Borkmann 2018-10-13  712  	struct sk_psock *psock_other;
604326b41a6fb9 Daniel Borkmann 2018-10-13  713  	struct sock *sk_other;
604326b41a6fb9 Daniel Borkmann 2018-10-13 @714  	bool ingress;
604326b41a6fb9 Daniel Borkmann 2018-10-13  715  
ca2f5f21dbbd5e John Fastabend  2020-05-29  716  	sk_other = tcp_skb_bpf_redirect_fetch(skb);
3a8d6f22c5f47a John Fastabend  2020-10-08  717  	/* This error is a buggy BPF program, it returned a redirect
3a8d6f22c5f47a John Fastabend  2020-10-08  718  	 * return code, but then didn't set a redirect interface.
3a8d6f22c5f47a John Fastabend  2020-10-08  719  	 */
ca2f5f21dbbd5e John Fastabend  2020-05-29  720  	if (unlikely(!sk_other)) {
ca2f5f21dbbd5e John Fastabend  2020-05-29  721  		kfree_skb(skb);
ca2f5f21dbbd5e John Fastabend  2020-05-29  722  		return;
ca2f5f21dbbd5e John Fastabend  2020-05-29  723  	}
ca2f5f21dbbd5e John Fastabend  2020-05-29  724  	psock_other = sk_psock(sk_other);
3a8d6f22c5f47a John Fastabend  2020-10-08  725  	/* This error indicates the socket is being torn down or had another
3a8d6f22c5f47a John Fastabend  2020-10-08  726  	 * error that caused the pipe to break. We can't send a packet on
3a8d6f22c5f47a John Fastabend  2020-10-08  727  	 * a socket that is in this state so we drop the skb.
3a8d6f22c5f47a John Fastabend  2020-10-08  728  	 */
ca2f5f21dbbd5e John Fastabend  2020-05-29  729  	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
ca2f5f21dbbd5e John Fastabend  2020-05-29  730  	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
ca2f5f21dbbd5e John Fastabend  2020-05-29  731  		kfree_skb(skb);
ca2f5f21dbbd5e John Fastabend  2020-05-29  732  		return;
ca2f5f21dbbd5e John Fastabend  2020-05-29  733  	}
ca2f5f21dbbd5e John Fastabend  2020-05-29  734  
ca2f5f21dbbd5e John Fastabend  2020-05-29  735  	ingress = tcp_skb_bpf_ingress(skb);
ca2f5f21dbbd5e John Fastabend  2020-05-29  736  	skb_queue_tail(&psock_other->ingress_skb, skb);
ca2f5f21dbbd5e John Fastabend  2020-05-29  737  	schedule_work(&psock_other->work);
ca2f5f21dbbd5e John Fastabend  2020-05-29  738  }
ca2f5f21dbbd5e John Fastabend  2020-05-29  739  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index b60768951de2..0bc8679e8033 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -433,10 +433,12 @@  static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			       u32 off, u32 len, bool ingress)
 {
-	if (ingress)
-		return sk_psock_skb_ingress(psock, skb);
-	else
+	if (!ingress) {
+		if (!sock_writeable(psock->sk))
+			return -EAGAIN;
 		return skb_send_sock_locked(psock->sk, skb, off, len);
+	}
+	return sk_psock_skb_ingress(psock, skb);
 }
 
 static void sk_psock_backlog(struct work_struct *work)
@@ -712,11 +714,18 @@  static void sk_psock_skb_redirect(struct sk_buff *skb)
 	bool ingress;
 
 	sk_other = tcp_skb_bpf_redirect_fetch(skb);
+	/* This error is a buggy BPF program, it returned a redirect
+	 * return code, but then didn't set a redirect interface.
+	 */
 	if (unlikely(!sk_other)) {
 		kfree_skb(skb);
 		return;
 	}
 	psock_other = sk_psock(sk_other);
+	/* This error indicates the socket is being torn down or had another
+	 * error that caused the pipe to break. We can't send a packet on
+	 * a socket that is in this state so we drop the skb.
+	 */
 	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
 	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 		kfree_skb(skb);
@@ -724,15 +733,8 @@  static void sk_psock_skb_redirect(struct sk_buff *skb)
 	}
 
 	ingress = tcp_skb_bpf_ingress(skb);
-	if ((!ingress && sock_writeable(sk_other)) ||
-	    (ingress &&
-	     atomic_read(&sk_other->sk_rmem_alloc) <=
-	     sk_other->sk_rcvbuf)) {
-		skb_queue_tail(&psock_other->ingress_skb, skb);
-		schedule_work(&psock_other->work);
-	} else {
-		kfree_skb(skb);
-	}
+	skb_queue_tail(&psock_other->ingress_skb, skb);
+	schedule_work(&psock_other->work);
 }
 
 static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)