diff mbox series

[RFC,bpf-next,05/12] xdp: add MEM_TYPE_ZERO_COPY

Message ID 20180515190615.23099-6-bjorn.topel@gmail.com
State RFC, archived
Delegated to: BPF Maintainers
Headers show
Series AF_XDP, zero-copy support | expand

Commit Message

Björn Töpel May 15, 2018, 7:06 p.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

Here, a new type of allocator support is added to the XDP return
API. A zero-copy allocated xdp_buff cannot be converted to an
xdp_frame. Instead is the buff has to be copied. This is not supported
at all in this commit.

Also, an opaque "handle" is added to xdp_buff. This can be used as a
context for the zero-copy allocator implementation.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/net/xdp.h | 10 ++++++++++
 net/core/xdp.c    | 47 ++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 50 insertions(+), 7 deletions(-)

Comments

Jesper Dangaard Brouer May 17, 2018, 5:57 a.m. UTC | #1
On Tue, 15 May 2018 21:06:08 +0200
Björn Töpel <bjorn.topel@gmail.com> wrote:

> @@ -82,6 +88,10 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>  	int metasize;
>  	int headroom;
>  
> +	// XXX implement clone, copy, use "native" MEM_TYPE
> +	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
> +		return NULL;
> +

There is going to be significant tradeoffs between AF_XDP zero-copy and
copy-variant.  The copy-variant, still have very attractive
RX-performance, and other benefits like no exposing unrelated packets
to userspace (but limit these to the XDP filter).

Thus, as a user I would like to choose between AF_XDP zero-copy and
copy-variant. Even if my NIC support zero-copy, I can be interested in
only enabling the copy-variant. This patchset doesn't let me choose.

How do we expose this to userspace?
(Maybe as simple as an sockaddr_xdp->sxdp_flags flag?)
Björn Töpel May 17, 2018, 7:08 a.m. UTC | #2
2018-05-17 7:57 GMT+02:00 Jesper Dangaard Brouer <brouer@redhat.com>:
> On Tue, 15 May 2018 21:06:08 +0200
> Björn Töpel <bjorn.topel@gmail.com> wrote:
>
>> @@ -82,6 +88,10 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>>       int metasize;
>>       int headroom;
>>
>> +     // XXX implement clone, copy, use "native" MEM_TYPE
>> +     if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
>> +             return NULL;
>> +
>
> There is going to be significant tradeoffs between AF_XDP zero-copy and
> copy-variant.  The copy-variant, still have very attractive
> RX-performance, and other benefits like no exposing unrelated packets
> to userspace (but limit these to the XDP filter).
>
> Thus, as a user I would like to choose between AF_XDP zero-copy and
> copy-variant. Even if my NIC support zero-copy, I can be interested in
> only enabling the copy-variant. This patchset doesn't let me choose.
>
> How do we expose this to userspace?
> (Maybe as simple as an sockaddr_xdp->sxdp_flags flag?)
>

We planned to add these flags later, but I think you're right that
it's better to do that right away.

If we try to follow the behavior of the XDP netlink interface: Pick
the "the best mode" when there are no flags. A user would like to
"force" a mode -- meaning that you select, say copy, and getting an
error if that's not supported. Four new flags?

diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 77b88c4efe98..ce1f710847b7 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -22,7 +22,11 @@
 #include <linux/types.h>

 /* Options for the sxdp_flags field */
-#define XDP_SHARED_UMEM 1
+#define XDP_SHARED_UMEM        (1U << 0)
+#define XDP_COPY_TX_UMEM    (1U << 1)
+#define XDP_ZEROCOPY_TX_UMEM    (1U << 2)
+#define XDP_COPY_RX_UMEM    (1U << 3)
+#define XDP_ZEROCOPY_RX_UMEM    (1U << 4)

 struct sockaddr_xdp {
     __u16 sxdp_family;

A better way?




> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
Björn Töpel May 17, 2018, 7:09 a.m. UTC | #3
2018-05-17 9:08 GMT+02:00 Björn Töpel <bjorn.topel@gmail.com>:
> 2018-05-17 7:57 GMT+02:00 Jesper Dangaard Brouer <brouer@redhat.com>:
>> On Tue, 15 May 2018 21:06:08 +0200
>> Björn Töpel <bjorn.topel@gmail.com> wrote:
>>
>>> @@ -82,6 +88,10 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>>>       int metasize;
>>>       int headroom;
>>>
>>> +     // XXX implement clone, copy, use "native" MEM_TYPE
>>> +     if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
>>> +             return NULL;
>>> +
>>
>> There is going to be significant tradeoffs between AF_XDP zero-copy and
>> copy-variant.  The copy-variant, still have very attractive
>> RX-performance, and other benefits like no exposing unrelated packets
>> to userspace (but limit these to the XDP filter).
>>
>> Thus, as a user I would like to choose between AF_XDP zero-copy and
>> copy-variant. Even if my NIC support zero-copy, I can be interested in
>> only enabling the copy-variant. This patchset doesn't let me choose.
>>
>> How do we expose this to userspace?
>> (Maybe as simple as an sockaddr_xdp->sxdp_flags flag?)
>>
>
> We planned to add these flags later, but I think you're right that
> it's better to do that right away.
>
> If we try to follow the behavior of the XDP netlink interface: Pick
> the "the best mode" when there are no flags. A user would like to
> "force" a mode -- meaning that you select, say copy, and getting an
> error if that's not supported. Four new flags?
>
> diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> index 77b88c4efe98..ce1f710847b7 100644
> --- a/include/uapi/linux/if_xdp.h
> +++ b/include/uapi/linux/if_xdp.h
> @@ -22,7 +22,11 @@
>  #include <linux/types.h>
>
>  /* Options for the sxdp_flags field */
> -#define XDP_SHARED_UMEM 1
> +#define XDP_SHARED_UMEM        (1U << 0)
> +#define XDP_COPY_TX_UMEM    (1U << 1)
> +#define XDP_ZEROCOPY_TX_UMEM    (1U << 2)
> +#define XDP_COPY_RX_UMEM    (1U << 3)
> +#define XDP_ZEROCOPY_RX_UMEM    (1U << 4)
>
>  struct sockaddr_xdp {
>      __u16 sxdp_family;
>
> A better way?
>

...but without the _UMEM suffix obviously.

>
>
>
>> --
>> Best regards,
>>   Jesper Dangaard Brouer
>>   MSc.CS, Principal Kernel Engineer at Red Hat
>>   LinkedIn: http://www.linkedin.com/in/brouer
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0b689cf561c7..e9eee37cddd6 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -37,6 +37,7 @@  enum xdp_mem_type {
 	MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
 	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
 	MEM_TYPE_PAGE_POOL,
+	MEM_TYPE_ZERO_COPY,
 	MEM_TYPE_MAX,
 };
 
@@ -47,6 +48,10 @@  struct xdp_mem_info {
 
 struct page_pool;
 
+struct zero_copy_allocator {
+	void (*free)(struct zero_copy_allocator *, unsigned long);
+};
+
 struct xdp_rxq_info {
 	struct net_device *dev;
 	u32 queue_index;
@@ -59,6 +64,7 @@  struct xdp_buff {
 	void *data_end;
 	void *data_meta;
 	void *data_hard_start;
+	unsigned long handle;
 	struct xdp_rxq_info *rxq;
 };
 
@@ -82,6 +88,10 @@  struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	int metasize;
 	int headroom;
 
+	// XXX implement clone, copy, use "native" MEM_TYPE
+	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+		return NULL;
+
 	/* Assure headroom is available for storing info */
 	headroom = xdp->data - xdp->data_hard_start;
 	metasize = xdp->data - xdp->data_meta;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bf6758f74339..4e11895b8cd9 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -31,6 +31,7 @@  struct xdp_mem_allocator {
 	union {
 		void *allocator;
 		struct page_pool *page_pool;
+		struct zero_copy_allocator *zc_alloc;
 	};
 	struct rhash_head node;
 	struct rcu_head rcu;
@@ -261,7 +262,7 @@  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	xdp_rxq->mem.type = type;
 
 	if (!allocator) {
-		if (type == MEM_TYPE_PAGE_POOL)
+		if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY)
 			return -EINVAL; /* Setup time check page_pool req */
 		return 0;
 	}
@@ -308,9 +309,11 @@  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
-static void xdp_return(void *data, struct xdp_mem_info *mem)
+void xdp_return_frame(struct xdp_frame *xdpf)
 {
+	struct xdp_mem_info *mem = &xdpf->mem;
 	struct xdp_mem_allocator *xa;
+	void *data = xdpf->data;
 	struct page *page;
 
 	switch (mem->type) {
@@ -336,16 +339,46 @@  static void xdp_return(void *data, struct xdp_mem_info *mem)
 		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
 		break;
 	}
-}
 
-void xdp_return_frame(struct xdp_frame *xdpf)
-{
-	xdp_return(xdpf->data, &xdpf->mem);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	xdp_return(xdp->data, &xdp->rxq->mem);
+	struct xdp_mem_info *mem = &xdp->rxq->mem;
+	struct xdp_mem_allocator *xa;
+	void *data = xdp->data;
+	struct page *page;
+
+	switch (mem->type) {
+	case MEM_TYPE_ZERO_COPY:
+		rcu_read_lock();
+		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
+		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+		xa->zc_alloc->free(xa->zc_alloc, xdp->handle);
+		rcu_read_unlock();
+		break;
+	case MEM_TYPE_PAGE_POOL:
+		rcu_read_lock();
+		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
+		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
+		page = virt_to_head_page(data);
+		if (xa)
+			page_pool_put_page(xa->page_pool, page);
+		else
+			put_page(page);
+		rcu_read_unlock();
+		break;
+	case MEM_TYPE_PAGE_SHARED:
+		page_frag_free(data);
+		break;
+	case MEM_TYPE_PAGE_ORDER0:
+		page = virt_to_page(data); /* Assumes order0 page*/
+		put_page(page);
+		break;
+	default:
+		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
+		break;
+	}
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);