diff mbox series

[bpf-next,2/7] xsk: add XDP_ATTACH bind() flag

Message ID 20181207114431.18038-3-bjorn.topel@gmail.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series Add XDP_ATTACH bind() flag to AF_XDP sockets | expand

Commit Message

Björn Töpel Dec. 7, 2018, 11:44 a.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

In this commit the XDP_ATTACH bind() flag is introduced. When an XDP
socket is bound with this flag set, the socket will be associated with
a certain netdev Rx queue. The idea is that the XDP socket users do
not have to deal with the XSKMAP, or even an XDP program. Instead
XDP_ATTACH will "attach" an XDP socket to a queue, load a builtin XDP
program that forwards all received packets from the attached queue to
the socket.

An XDP socket bound with this option performs better, since the BPF
program is smaller, and the kernel code-path also has fewer
instructions.

This commit only introduces the first part of XDP_ATTACH, namely
associating the XDP socket to a netdev Rx queue.

To redirect XDP frames to an attached socket, the XDP program must use
the bpf_xsk_redirect that will be introduced in the next commit.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/linux/netdevice.h   |  1 +
 include/net/xdp_sock.h      |  2 ++
 include/uapi/linux/if_xdp.h |  1 +
 net/xdp/xsk.c               | 50 +++++++++++++++++++++++++++++--------
 4 files changed, 43 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94fb2e12f117..a6cc68d2504c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -743,6 +743,7 @@  struct netdev_rx_queue {
 	struct xdp_rxq_info		xdp_rxq;
 #ifdef CONFIG_XDP_SOCKETS
 	struct xdp_umem                 *umem;
+	struct xdp_sock			*xsk;
 #endif
 } ____cacheline_aligned_in_smp;
 
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 13acb9803a6d..95315eb0410a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -72,7 +72,9 @@  struct xdp_sock {
 
 struct xdp_buff;
 #ifdef CONFIG_XDP_SOCKETS
+int xsk_generic_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 void xsk_flush(struct xdp_sock *xs);
 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index caed8b1614ff..bd76235c2749 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -16,6 +16,7 @@ 
 #define XDP_SHARED_UMEM	(1 << 0)
 #define XDP_COPY	(1 << 1) /* Force copy-mode */
 #define XDP_ZEROCOPY	(1 << 2) /* Force zero-copy mode */
+#define XDP_ATTACH	(1 << 3)
 
 struct sockaddr_xdp {
 	__u16 sxdp_family;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a03268454a27..1eff7ac8596d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -100,17 +100,20 @@  static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	return err;
 }
 
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+int xsk_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	u32 len;
+	u32 len = xdp->data_end - xdp->data;
+
+	return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
+		__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+}
 
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
 		return -EINVAL;
 
-	len = xdp->data_end - xdp->data;
-
-	return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
-		__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+	return xsk_attached_rcv(xs, xdp);
 }
 
 void xsk_flush(struct xdp_sock *xs)
@@ -119,7 +122,7 @@  void xsk_flush(struct xdp_sock *xs)
 	xs->sk.sk_data_ready(&xs->sk);
 }
 
-int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+int xsk_generic_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	u32 metalen = xdp->data - xdp->data_meta;
 	u32 len = xdp->data_end - xdp->data;
@@ -127,9 +130,6 @@  int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	u64 addr;
 	int err;
 
-	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
-		return -EINVAL;
-
 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
 	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
 		xs->rx_dropped++;
@@ -152,6 +152,14 @@  int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return err;
 }
 
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+		return -EINVAL;
+
+	return xsk_generic_attached_rcv(xs, xdp);
+}
+
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
 {
 	xskq_produce_flush_addr_n(umem->cq, nb_entries);
@@ -339,6 +347,18 @@  static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
 	return 0;
 }
 
+static void xsk_detach(struct xdp_sock *xs)
+{
+	WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, NULL);
+}
+
+static int xsk_attach(struct xdp_sock *xs, struct net_device *dev, u16 qid)
+{
+	WRITE_ONCE(dev->_rx[qid].xsk, xs);
+
+	return 0;
+}
+
 static int xsk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -359,6 +379,7 @@  static int xsk_release(struct socket *sock)
 
 		/* Wait for driver to stop using the xdp socket. */
 		xdp_del_sk_umem(xs->umem, xs);
+		xsk_detach(xs);
 		xs->dev = NULL;
 		synchronize_net();
 		dev_put(dev);
@@ -432,7 +453,8 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		struct xdp_sock *umem_xs;
 		struct socket *sock;
 
-		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+		    (flags & XDP_ATTACH)) {
 			/* Cannot specify flags for shared sockets. */
 			err = -EINVAL;
 			goto out_unlock;
@@ -478,6 +500,12 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
 		if (err)
 			goto out_unlock;
+
+		if (flags & XDP_ATTACH) {
+			err = xsk_attach(xs, dev, qid);
+			if (err)
+				goto out_unlock;
+		}
 	}
 
 	xs->dev = dev;