diff mbox series

[RFC,06/24] net: wire up xsk support in the XDP_REDIRECT path

Message ID 20180131135356.19134-7-bjorn.topel@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show
Series Introducing AF_XDP support | expand

Commit Message

Björn Töpel Jan. 31, 2018, 1:53 p.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

In this commit we add support for XDP programs to redirect frames to a
bound AF_XDP socket.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/filter.h |  2 +-
 include/net/xdp_sock.h | 28 ++++++++++++++++++++
 net/core/dev.c         | 28 +++++++++++---------
 net/core/filter.c      | 72 ++++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 111 insertions(+), 19 deletions(-)
 create mode 100644 include/net/xdp_sock.h
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..43cacfe2cc2a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -747,7 +747,7 @@  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
  * This does not appear to be a real limitation for existing software.
  */
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-			    struct bpf_prog *prog);
+			    struct xdp_buff *xdp, struct bpf_prog *prog);
 int xdp_do_redirect(struct net_device *dev,
 		    struct xdp_buff *xdp,
 		    struct bpf_prog *prog);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
new file mode 100644
index 000000000000..132489fe0e70
--- /dev/null
+++ b/include/net/xdp_sock.h
@@ -0,0 +1,28 @@ 
+#ifndef _LINUX_AF_XDP_SOCK_H
+#define _LINUX_AF_XDP_SOCK_H
+
+struct xdp_sock;
+struct xdp_buff;
+
+#ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_rcv(struct xdp_buff *xdp);
+struct xdp_sock *xsk_rcv(struct xdp_sock *xsk, struct xdp_buff *xdp);
+void xsk_flush(struct xdp_sock *xsk);
+#else
+static inline int xsk_generic_rcv(struct xdp_buff *xdp)
+{
+	return -ENOTSUPP;
+}
+
+static inline struct xdp_sock *xsk_rcv(struct xdp_sock *xsk,
+				       struct xdp_buff *xdp)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void xsk_flush(struct xdp_sock *xsk)
+{
+}
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_AF_XDP_SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..94d2950fc33d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3951,11 +3951,11 @@  static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
 }
 
 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct xdp_buff *xdp,
 				     struct bpf_prog *xdp_prog)
 {
 	struct netdev_rx_queue *rxqueue;
 	u32 metalen, act = XDP_DROP;
-	struct xdp_buff xdp;
 	void *orig_data;
 	int hlen, off;
 	u32 mac_len;
@@ -3991,18 +3991,18 @@  static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	 */
 	mac_len = skb->data - skb_mac_header(skb);
 	hlen = skb_headlen(skb) + mac_len;
-	xdp.data = skb->data - mac_len;
-	xdp.data_meta = xdp.data;
-	xdp.data_end = xdp.data + hlen;
-	xdp.data_hard_start = skb->data - skb_headroom(skb);
-	orig_data = xdp.data;
+	xdp->data = skb->data - mac_len;
+	xdp->data_meta = xdp->data;
+	xdp->data_end = xdp->data + hlen;
+	xdp->data_hard_start = skb->data - skb_headroom(skb);
+	orig_data = xdp->data;
 
 	rxqueue = netif_get_rxqueue(skb);
-	xdp.rxq = &rxqueue->xdp_rxq;
+	xdp->rxq = &rxqueue->xdp_rxq;
 
-	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
-	off = xdp.data - orig_data;
+	off = xdp->data - orig_data;
 	if (off > 0)
 		__skb_pull(skb, off);
 	else if (off < 0)
@@ -4015,7 +4015,7 @@  static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		__skb_push(skb, mac_len);
 		break;
 	case XDP_PASS:
-		metalen = xdp.data - xdp.data_meta;
+		metalen = xdp->data - xdp->data_meta;
 		if (metalen)
 			skb_metadata_set(skb, metalen);
 		break;
@@ -4065,17 +4065,19 @@  static struct static_key generic_xdp_needed __read_mostly;
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 {
 	if (xdp_prog) {
-		u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+		struct xdp_buff xdp;
+		u32 act;
 		int err;
 
+		act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
 		if (act != XDP_PASS) {
 			switch (act) {
 			case XDP_REDIRECT:
 				err = xdp_do_generic_redirect(skb->dev, skb,
-							      xdp_prog);
+							      &xdp, xdp_prog);
 				if (err)
 					goto out_redir;
-			/* fallthru to submit skb */
+				break;
 			case XDP_TX:
 				generic_xdp_tx(skb, xdp_prog);
 				break;
diff --git a/net/core/filter.c b/net/core/filter.c
index aedf57489cb5..eab47173bc9e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -57,6 +57,7 @@ 
 #include <net/busy_poll.h>
 #include <net/tcp.h>
 #include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -1809,8 +1810,8 @@  struct redirect_info {
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
 	unsigned long   map_owner;
-	bool to_xsk;
-	/* XXX cache xsk socket here, to avoid lookup? */
+	bool xsk;
+	struct xdp_sock *xsk_to_flush;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2575,6 +2576,7 @@  static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 void xdp_do_flush_map(void)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct xdp_sock *xsk = ri->xsk_to_flush;
 	struct bpf_map *map = ri->map_to_flush;
 
 	ri->map_to_flush = NULL;
@@ -2590,6 +2592,10 @@  void xdp_do_flush_map(void)
 			break;
 		}
 	}
+
+	ri->xsk_to_flush = NULL;
+	if (xsk)
+		xsk_flush(xsk);
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
@@ -2611,6 +2617,29 @@  static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
 	return (unsigned long)xdp_prog->aux != aux;
 }
 
+static int xdp_do_xsk_redirect(struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct xdp_sock *xsk;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+	ri->map_owner = 0;
+	ri->xsk = false;
+
+	xsk = xsk_rcv(ri->xsk_to_flush, xdp);
+	if (IS_ERR(xsk)) {
+		_trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1,
+					PTR_ERR(xsk));
+		return PTR_ERR(xsk);
+	}
+
+	ri->xsk_to_flush = xsk;
+	_trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1);
+
+	return 0;
+}
+
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
@@ -2624,6 +2653,7 @@  static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 	ri->ifindex = 0;
 	ri->map = NULL;
 	ri->map_owner = 0;
+	ri->xsk = false;
 
 	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 		err = -EFAULT;
@@ -2659,6 +2689,9 @@  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	u32 index = ri->ifindex;
 	int err;
 
+	if (ri->xsk)
+		return xdp_do_xsk_redirect(xdp, xdp_prog);
+
 	if (ri->map)
 		return xdp_do_redirect_map(dev, xdp, xdp_prog);
 
@@ -2681,6 +2714,30 @@  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
+static int xdp_do_generic_xsk_redirect(struct sk_buff *skb,
+				       struct xdp_buff *xdp,
+				       struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	int err;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+	ri->map_owner = 0;
+	ri->xsk = false;
+
+	err = xsk_generic_rcv(xdp);
+	if (err) {
+		_trace_xdp_redirect_err(xdp->rxq->dev, xdp_prog, -1, err);
+		return err;
+	}
+
+	consume_skb(skb);
+	_trace_xdp_redirect(xdp->rxq->dev, xdp_prog, -1);  /* XXX fix tracing to support xsk */
+
+	return 0;
+}
+
 static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
 {
 	unsigned int len;
@@ -2709,7 +2766,7 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 	ri->ifindex = 0;
 	ri->map = NULL;
 	ri->map_owner = 0;
-	ri->to_xsk = false;
+	ri->xsk = false;
 
 	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 		err = -EFAULT;
@@ -2733,6 +2790,7 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 	}
 
 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	generic_xdp_tx(skb, xdp_prog);
 	return 0;
 err:
 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
@@ -2740,13 +2798,16 @@  static int xdp_do_generic_redirect_map(struct net_device *dev,
 }
 
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
-			    struct bpf_prog *xdp_prog)
+			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
 
+	if (ri->xsk)
+		return xdp_do_generic_xsk_redirect(skb, xdp, xdp_prog);
+
 	if (ri->map)
 		return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
 
@@ -2762,6 +2823,7 @@  int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 
 	skb->dev = fwd;
 	_trace_xdp_redirect(dev, xdp_prog, index);
+	generic_xdp_tx(skb, xdp_prog);
 	return 0;
 err:
 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
@@ -2828,7 +2890,7 @@  BPF_CALL_0(bpf_xdpsk_redirect)
 	 * and XDP_ABORTED on failure? Also, then we can populate xsk
 	 * in ri, and don't have to do the lookup multiple times.
 	 */
-	ri->to_xsk = true;
+	ri->xsk = true;
 
 	return XDP_REDIRECT;
 }