diff mbox series

[RFC,7/9] veth: Add XDP TX and REDIRECT

Message ID 20180424143923.26519-8-toshiaki.makita1@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show
Series veth: Driver XDP | expand

Commit Message

Toshiaki Makita April 24, 2018, 2:39 p.m. UTC
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

This allows further redirection of xdp_frames like

 NIC   -> veth--veth -> veth--veth
 (XDP)          (XDP)         (XDP)

The intermediate XDP, redirecting packets from NIC to the other veth,
reuses xdp_mem info from NIC so that page recycling of the NIC works on
the destination veth's XDP.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
 drivers/net/veth.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 85 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b1d591be0eba..98fc91a64e29 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -43,6 +43,7 @@  struct veth_priv {
 	struct bpf_prog __rcu	*xdp_prog;
 	struct net_device __rcu	*peer;
 	atomic64_t		dropped;
+	struct xdp_mem_info	xdp_mem;
 	unsigned		requested_headroom;
 	bool			rx_notify_masked;
 	struct ptr_ring		xdp_ring;
@@ -346,9 +347,21 @@  static void veth_xdp_flush(struct net_device *dev)
 	rcu_read_unlock();
 }
 
+static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+
+	if (unlikely(!frame))
+		return -EOVERFLOW;
+
+	return veth_xdp_xmit(dev, frame);
+}
+
 static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
-					struct xdp_frame *frame)
+					struct xdp_frame *frame, bool *xdp_xmit,
+					bool *xdp_redir)
 {
+	struct xdp_frame orig_frame;
 	struct bpf_prog *xdp_prog;
 	unsigned int headroom;
 	struct sk_buff *skb;
@@ -372,6 +385,29 @@  static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
 		case XDP_PASS:
 			delta = frame->data - xdp.data;
 			break;
+		case XDP_TX:
+			orig_frame = *frame;
+			xdp.data_hard_start = frame;
+			xdp.rxq->mem = frame->mem;
+			if (unlikely(veth_xdp_tx(priv->dev, &xdp))) {
+				trace_xdp_exception(priv->dev, xdp_prog, act);
+				frame = &orig_frame;
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
+			rcu_read_unlock();
+			goto xdp_xmit;
+		case XDP_REDIRECT:
+			orig_frame = *frame;
+			xdp.data_hard_start = frame;
+			xdp.rxq->mem = frame->mem;
+			if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
+				frame = &orig_frame;
+				goto err_xdp;
+			}
+			*xdp_redir = true;
+			rcu_read_unlock();
+			goto xdp_xmit;
 		default:
 			bpf_warn_invalid_xdp_action(act);
 		case XDP_ABORTED:
@@ -396,12 +432,13 @@  static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
 err_xdp:
 	rcu_read_unlock();
 	xdp_return_frame(frame);
-
+xdp_xmit:
 	return NULL;
 }
 
 static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
-					struct sk_buff *skb)
+					struct sk_buff *skb, bool *xdp_xmit,
+					bool *xdp_redir)
 {
 	u32 pktlen, headroom, act, metalen;
 	int size, mac_len, delta, off;
@@ -469,6 +506,26 @@  static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
 	switch (act) {
 	case XDP_PASS:
 		break;
+	case XDP_TX:
+		get_page(virt_to_page(xdp.data));
+		dev_consume_skb_any(skb);
+		xdp.rxq->mem = priv->xdp_mem;
+		if (unlikely(veth_xdp_tx(priv->dev, &xdp))) {
+			trace_xdp_exception(priv->dev, xdp_prog, act);
+			goto err_xdp;
+		}
+		*xdp_xmit = true;
+		rcu_read_unlock();
+		goto xdp_xmit;
+	case XDP_REDIRECT:
+		get_page(virt_to_page(xdp.data));
+		dev_consume_skb_any(skb);
+		xdp.rxq->mem = priv->xdp_mem;
+		if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
+			goto err_xdp;
+		*xdp_redir = true;
+		rcu_read_unlock();
+		goto xdp_xmit;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 	case XDP_ABORTED:
@@ -496,9 +553,15 @@  static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
 	rcu_read_unlock();
 	dev_kfree_skb_any(skb);
 	return NULL;
+err_xdp:
+	rcu_read_unlock();
+	page_frag_free(xdp.data);
+xdp_xmit:
+	return NULL;
 }
 
-static int veth_xdp_rcv(struct veth_priv *priv, int budget)
+static int veth_xdp_rcv(struct veth_priv *priv, int budget, bool *xdp_xmit,
+			bool *xdp_redir)
 {
 	int i, done = 0;
 
@@ -509,10 +572,12 @@  static int veth_xdp_rcv(struct veth_priv *priv, int budget)
 		if (!ptr)
 			break;
 
-		if (veth_is_xdp_frame(ptr))
-			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr));
-		else
-			skb = veth_xdp_rcv_skb(priv, ptr);
+		if (veth_is_xdp_frame(ptr)) {
+			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
+					       xdp_xmit, xdp_redir);
+		} else {
+			skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit, xdp_redir);
+		}
 
 		if (skb)
 			napi_gro_receive(&priv->xdp_napi, skb);
@@ -527,9 +592,11 @@  static int veth_poll(struct napi_struct *napi, int budget)
 {
 	struct veth_priv *priv =
 		container_of(napi, struct veth_priv, xdp_napi);
+	bool xdp_xmit = false;
+	bool xdp_redir = false;
 	int done;
 
-	done = veth_xdp_rcv(priv, budget);
+	done = veth_xdp_rcv(priv, budget, &xdp_xmit, &xdp_redir);
 
 	if (done < budget && napi_complete_done(napi, done)) {
 		/* Write rx_notify_masked before reading ptr_ring */
@@ -540,6 +607,11 @@  static int veth_poll(struct napi_struct *napi, int budget)
 		}
 	}
 
+	if (xdp_xmit)
+		veth_xdp_flush(priv->dev);
+	if (xdp_redir)
+		xdp_do_flush_map();
+
 	return done;
 }
 
@@ -585,6 +657,9 @@  static int veth_open(struct net_device *dev)
 	if (err < 0)
 		goto err_reg_mem;
 
+	/* Save original mem info as it can be overwritten */
+	priv->xdp_mem = priv->xdp_rxq.mem;
+
 	if (rtnl_dereference(priv->xdp_prog)) {
 		err = veth_napi_add(dev);
 		if (err)
@@ -615,6 +690,7 @@  static int veth_close(struct net_device *dev)
 	if (rtnl_dereference(priv->xdp_prog))
 		veth_napi_del(dev);
 
+	priv->xdp_rxq.mem = priv->xdp_mem;
 	xdp_rxq_info_unreg(&priv->xdp_rxq);
 
 	return 0;