diff mbox series

[bpf-next,RFCv3,4/6] veth: add zero-copy AF_XDP TX support.

Message ID 1545856073-8680-5-git-send-email-u9012063@gmail.com
State RFC, archived
Delegated to: BPF Maintainers
Headers show
Series AF_XDP support for veth. | expand

Commit Message

William Tu Dec. 26, 2018, 8:27 p.m. UTC
Remove the extra copy when doing AF_XDP TX.  The xdp frame comes
directly from the umem element and passes to the receiving logic.
Also, only depending on async_xmit to kick napi poll isn't fast
enough. So re-schedule the napi at the end of poll so the ksoftirqd
can keep processing the packets.  The performance increases from
1.1Mpps to 1.4Mpps, when running zero copy xdpsock as sender and
XDP_DROP at the receiver side.

Signed-off-by: William Tu <u9012063@gmail.com>
---
 drivers/net/veth.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 10cf9ded59f1..551444195398 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -761,45 +761,34 @@  static int veth_xsk_poll(struct napi_struct *napi, int budget)
 	while (peer_rq->xsk_umem && budget--) {
 		unsigned int inner_xdp_xmit = 0;
 		unsigned int metasize = 0;
-		struct xdp_frame *xdpf;
+		struct xdp_frame xdpf;
 		bool dropped = false;
 		struct sk_buff *skb;
 		struct page *page;
 		void *vaddr;
-		void *addr;
 		u32 len;
 
 		if (!xsk_umem_consume_tx_virtual(peer_rq->xsk_umem, &vaddr, &len))
 			break;
 
-		page = dev_alloc_page();
-		if (!page) {
-			xsk_umem_complete_tx(peer_rq->xsk_umem, 1);
-			xsk_umem_consume_tx_done(peer_rq->xsk_umem);
-			return -ENOMEM;
-		}
-
-		addr = page_to_virt(page);
-		xdpf = addr;
-		memset(xdpf, 0, sizeof(*xdpf));
-
-		addr += sizeof(*xdpf);
-		memcpy(addr, vaddr, len);
+		xdpf.data = vaddr + metasize;
+		xdpf.len = len;
+		xdpf.headroom = 0;
+		xdpf.metasize = metasize;
+		xdpf.mem.type = MEM_TYPE_ZERO_COPY_VDEV;
 
-		xdpf->data = addr + metasize;
-		xdpf->len = len;
-		xdpf->headroom = 0;
-		xdpf->metasize = metasize;
-		xdpf->mem.type = MEM_TYPE_PAGE_SHARED;
+		page = virt_to_head_page(vaddr);
+		if (page->mem_cgroup)
+			page->mem_cgroup = NULL;
 
 		/* put into rq */
-		skb = veth_xdp_rcv_one(rq, xdpf, &inner_xdp_xmit);
+		skb = veth_xdp_rcv_one(rq, &xdpf, &inner_xdp_xmit);
 		if (!skb) {
 			/* Peer side has XDP program attached */
 			if (inner_xdp_xmit & VETH_XDP_TX) {
 				/* Not supported */
 				pr_warn("veth: peer XDP_TX not supported\n");
-				xdp_return_frame(xdpf);
+				xdp_return_frame(&xdpf);
 				dropped = true;
 				goto skip_tx;
 			} else if (inner_xdp_xmit & VETH_XDP_REDIR) {
@@ -808,7 +797,8 @@  static int veth_xsk_poll(struct napi_struct *napi, int budget)
 				dropped = true;
 			}
 		} else {
-			napi_gro_receive(&rq->xdp_napi, skb);
+			napi_gro_receive(&rq->xdp_napi, skb_copy(skb, GFP_KERNEL));
+			kfree(skb);
 		}
 skip_tx:
 		xsk_umem_complete_tx(peer_rq->xsk_umem, 1);
@@ -856,6 +846,11 @@  static int veth_poll(struct napi_struct *napi, int budget)
 		xdp_do_flush_map();
 	xdp_clear_return_frame_no_direct();
 
+	/* schedule again so the CPU can keep receiving
+	 * at higher rate
+	 */
+	napi_schedule(&rq->xdp_napi);
+
 	return done > budget ? budget : done;
 }