Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/1132623/?format=api
{ "id": 1132623, "url": "http://patchwork.ozlabs.org/api/patches/1132623/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20190716030637.5634-4-kevin.laatz@intel.com/", "project": { "id": 46, "url": "http://patchwork.ozlabs.org/api/projects/46/?format=api", "name": "Intel Wired Ethernet development", "link_name": "intel-wired-lan", "list_id": "intel-wired-lan.osuosl.org", "list_email": "intel-wired-lan@osuosl.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20190716030637.5634-4-kevin.laatz@intel.com>", "list_archive_url": null, "date": "2019-07-16T03:06:30", "name": "[v2,03/10] xsk: add support to allow unaligned chunk placement", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": false, "hash": "8eb694da30a36b0944e5f465169c258df10ee434", "submitter": { "id": 76901, "url": "http://patchwork.ozlabs.org/api/people/76901/?format=api", "name": "Laatz, Kevin", "email": "kevin.laatz@intel.com" }, "delegate": { "id": 68, "url": "http://patchwork.ozlabs.org/api/users/68/?format=api", "username": "jtkirshe", "first_name": "Jeff", "last_name": "Kirsher", "email": "jeffrey.t.kirsher@intel.com" }, "mbox": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20190716030637.5634-4-kevin.laatz@intel.com/mbox/", "series": [ { "id": 119750, "url": "http://patchwork.ozlabs.org/api/series/119750/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/list/?series=119750", "date": "2019-07-16T03:06:28", "name": "[v2,01/10] i40e: simplify Rx buffer recycle", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/119750/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/1132623/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/1132623/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<intel-wired-lan-bounces@osuosl.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Delivered-To": [ "patchwork-incoming@bilbo.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=osuosl.org\n\t(client-ip=140.211.166.137; helo=fraxinus.osuosl.org;\n\tenvelope-from=intel-wired-lan-bounces@osuosl.org;\n\treceiver=<UNKNOWN>)", "ozlabs.org;\n\tdmarc=fail (p=none dis=none) header.from=intel.com" ], "Received": [ "from fraxinus.osuosl.org (smtp4.osuosl.org [140.211.166.137])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 45nyfJ0JtCz9sNC\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 16 Jul 2019 21:22:00 +1000 (AEST)", "from localhost (localhost [127.0.0.1])\n\tby fraxinus.osuosl.org (Postfix) with ESMTP id 9442785CA8;\n\tTue, 16 Jul 2019 11:21:58 +0000 (UTC)", "from fraxinus.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id 2Hk0j-fXtILl; Tue, 16 Jul 2019 11:21:57 +0000 (UTC)", "from ash.osuosl.org (ash.osuosl.org [140.211.166.34])\n\tby fraxinus.osuosl.org (Postfix) with ESMTP id 45AD685FFC;\n\tTue, 16 Jul 2019 11:21:57 +0000 (UTC)", "from fraxinus.osuosl.org (smtp4.osuosl.org [140.211.166.137])\n\tby ash.osuosl.org (Postfix) with ESMTP id 617DF1BF82D\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tTue, 16 Jul 2019 11:21:56 +0000 (UTC)", "from localhost (localhost [127.0.0.1])\n\tby fraxinus.osuosl.org (Postfix) with ESMTP id 5E50585FFC\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tTue, 16 Jul 2019 11:21:56 +0000 (UTC)", "from fraxinus.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id Gbnviknfu2uZ for <intel-wired-lan@lists.osuosl.org>;\n\tTue, 16 Jul 2019 11:21:55 +0000 (UTC)", "from mga03.intel.com (mga03.intel.com [134.134.136.65])\n\tby fraxinus.osuosl.org (Postfix) with ESMTPS id 27E1085CA8\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tTue, 16 Jul 2019 11:21:55 +0000 (UTC)", "from fmsmga006.fm.intel.com ([10.253.24.20])\n\tby orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t16 Jul 2019 04:21:54 -0700", "from silpixa00399838.ir.intel.com (HELO\n\tsilpixa00399838.ger.corp.intel.com) ([10.237.223.10])\n\tby fmsmga006.fm.intel.com with ESMTP; 16 Jul 2019 04:21:52 -0700" ], "X-Virus-Scanned": [ "amavisd-new at osuosl.org", "amavisd-new at osuosl.org" ], "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6", "X-Amp-Result": "SKIPPED(no attachment in message)", "X-Amp-File-Uploaded": "False", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.63,498,1557212400\"; d=\"scan'208\";a=\"366631423\"", "From": "Kevin Laatz <kevin.laatz@intel.com>", "To": "netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,\n\tbjorn.topel@intel.com, magnus.karlsson@intel.com,\n\tjakub.kicinski@netronome.com, jonathan.lemon@gmail.com", "Date": "Tue, 16 Jul 2019 03:06:30 +0000", "Message-Id": "<20190716030637.5634-4-kevin.laatz@intel.com>", "X-Mailer": "git-send-email 2.17.1", "In-Reply-To": "<20190716030637.5634-1-kevin.laatz@intel.com>", "References": "<20190620090958.2135-1-kevin.laatz@intel.com>\n\t<20190716030637.5634-1-kevin.laatz@intel.com>", "Subject": "[Intel-wired-lan] [PATCH v2 03/10] xsk: add support to allow\n\tunaligned chunk placement", "X-BeenThere": "intel-wired-lan@osuosl.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "Intel Wired Ethernet Linux Kernel Driver Development\n\t<intel-wired-lan.osuosl.org>", "List-Unsubscribe": "<https://lists.osuosl.org/mailman/options/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=unsubscribe>", "List-Archive": "<http://lists.osuosl.org/pipermail/intel-wired-lan/>", "List-Post": "<mailto:intel-wired-lan@osuosl.org>", "List-Help": "<mailto:intel-wired-lan-request@osuosl.org?subject=help>", "List-Subscribe": "<https://lists.osuosl.org/mailman/listinfo/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=subscribe>", "Cc": "bruce.richardson@intel.com, ciara.loftus@intel.com,\n\tintel-wired-lan@lists.osuosl.org, bpf@vger.kernel.org,\n\tKevin Laatz <kevin.laatz@intel.com>", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"us-ascii\"", "Content-Transfer-Encoding": "7bit", "Errors-To": "intel-wired-lan-bounces@osuosl.org", "Sender": "\"Intel-wired-lan\" <intel-wired-lan-bounces@osuosl.org>" }, "content": "Currently, addresses are chunk size aligned. This means, we are very\nrestricted in terms of where we can place chunk within the umem. For\nexample, if we have a chunk size of 2k, then our chunks can only be placed\nat 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0).\n\nThis patch introduces the ability to use unaligned chunks. With these\nchanges, we are no longer bound to having to place chunks at a 2k (or\nwhatever your chunk size is) interval. Since we are no longer dealing with\naligned chunks, they can now cross page boundaries. Checks for page\ncontiguity have been added in order to keep track of which pages are\nfollowed by a physically contiguous page.\n\nSigned-off-by: Kevin Laatz <kevin.laatz@intel.com>\nSigned-off-by: Ciara Loftus <ciara.loftus@intel.com>\nSigned-off-by: Bruce Richardson <bruce.richardson@intel.com>\n\n---\nv2:\n - Add checks for the flags coming from userspace\n - Fix how we get chunk_size in xsk_diag.c\n - Add defines for masking the new descriptor format\n - Modified the rx functions to use new descriptor format\n - Modified the tx functions to use new descriptor format\n---\n include/net/xdp_sock.h | 2 +\n include/uapi/linux/if_xdp.h | 9 ++++\n net/xdp/xdp_umem.c | 17 ++++---\n net/xdp/xsk.c | 89 ++++++++++++++++++++++++++++++-------\n net/xdp/xsk_diag.c | 2 +-\n net/xdp/xsk_queue.h | 70 +++++++++++++++++++++++++----\n 6 files changed, 159 insertions(+), 30 deletions(-)", "diff": "diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h\nindex 69796d264f06..f7ab8ff33f06 100644\n--- a/include/net/xdp_sock.h\n+++ b/include/net/xdp_sock.h\n@@ -19,6 +19,7 @@ struct xsk_queue;\n struct xdp_umem_page {\n \tvoid *addr;\n \tdma_addr_t dma;\n+\tbool next_pg_contig;\n };\n \n struct xdp_umem_fq_reuse {\n@@ -48,6 +49,7 @@ struct xdp_umem {\n \tbool zc;\n \tspinlock_t xsk_list_lock;\n \tstruct list_head xsk_list;\n+\tu32 flags;\n };\n \n struct xdp_sock {\ndiff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h\nindex faaa5ca2a117..f8dc68fcdf78 100644\n--- a/include/uapi/linux/if_xdp.h\n+++ b/include/uapi/linux/if_xdp.h\n@@ -17,6 +17,9 @@\n #define XDP_COPY\t(1 << 1) /* Force copy-mode */\n #define XDP_ZEROCOPY\t(1 << 2) /* Force zero-copy mode */\n \n+/* Flags for xsk_umem_config flags */\n+#define XDP_UMEM_UNALIGNED_CHUNKS (1 << 0)\n+\n struct sockaddr_xdp {\n \t__u16 sxdp_family;\n \t__u16 sxdp_flags;\n@@ -53,6 +56,7 @@ struct xdp_umem_reg {\n \t__u64 len; /* Length of packet data area */\n \t__u32 chunk_size;\n \t__u32 headroom;\n+\t__u32 flags;\n };\n \n struct xdp_statistics {\n@@ -74,6 +78,11 @@ struct xdp_options {\n #define XDP_UMEM_PGOFF_FILL_RING\t0x100000000ULL\n #define XDP_UMEM_PGOFF_COMPLETION_RING\t0x180000000ULL\n \n+/* Masks for unaligned chunks mode */\n+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48\n+#define XSK_UNALIGNED_BUF_ADDR_MASK \\\n+\t((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)\n+\n /* Rx/Tx descriptor */\n struct xdp_desc {\n \t__u64 addr;\ndiff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c\nindex 20c91f02d3d8..6130735bdd3d 100644\n--- a/net/xdp/xdp_umem.c\n+++ b/net/xdp/xdp_umem.c\n@@ -303,6 +303,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)\n \n static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)\n {\n+\tbool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNKS;\n \tu32 chunk_size = mr->chunk_size, headroom = mr->headroom;\n \tunsigned int chunks, chunks_per_page;\n \tu64 addr = mr->addr, size = mr->len;\n@@ -318,7 +319,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)\n \t\treturn -EINVAL;\n \t}\n \n-\tif (!is_power_of_2(chunk_size))\n+\tif (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNKS))\n+\t\treturn -EINVAL;\n+\n+\tif (!unaligned_chunks && !is_power_of_2(chunk_size))\n \t\treturn -EINVAL;\n \n \tif (!PAGE_ALIGNED(addr)) {\n@@ -335,9 +339,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)\n \tif (chunks == 0)\n \t\treturn -EINVAL;\n \n-\tchunks_per_page = PAGE_SIZE / chunk_size;\n-\tif (chunks < chunks_per_page || chunks % chunks_per_page)\n-\t\treturn -EINVAL;\n+\tif (!unaligned_chunks) {\n+\t\tchunks_per_page = PAGE_SIZE / chunk_size;\n+\t\tif (chunks < chunks_per_page || chunks % chunks_per_page)\n+\t\t\treturn -EINVAL;\n+\t}\n \n \theadroom = ALIGN(headroom, 64);\n \n@@ -346,13 +352,14 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)\n \t\treturn -EINVAL;\n \n \tumem->address = (unsigned long)addr;\n-\tumem->chunk_mask = ~((u64)chunk_size - 1);\n+\tumem->chunk_mask = unaligned_chunks ? U64_MAX : ~((u64)chunk_size - 1);\n \tumem->size = size;\n \tumem->headroom = headroom;\n \tumem->chunk_size_nohr = chunk_size - headroom;\n \tumem->npgs = size / PAGE_SIZE;\n \tumem->pgs = NULL;\n \tumem->user = NULL;\n+\tumem->flags = mr->flags;\n \tINIT_LIST_HEAD(&umem->xsk_list);\n \tspin_lock_init(&umem->xsk_list_lock);\n \ndiff --git a/net/xdp/xsk.c b/net/xdp/xsk.c\nindex d4d6f10aa936..78089825821a 100644\n--- a/net/xdp/xsk.c\n+++ b/net/xdp/xsk.c\n@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);\n \n u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)\n {\n-\treturn xskq_peek_addr(umem->fq, addr);\n+\treturn xskq_peek_addr(umem->fq, addr, umem);\n }\n EXPORT_SYMBOL(xsk_umem_peek_addr);\n \n@@ -55,21 +55,42 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)\n }\n EXPORT_SYMBOL(xsk_umem_discard_addr);\n \n+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for\n+ * each page. This is only required in copy mode.\n+ */\n+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,\n+\t\t\t u32 len, u32 metalen)\n+{\n+\tvoid *to_buf = xdp_umem_get_data(umem, addr);\n+\n+\tif (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {\n+\t\tvoid *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;\n+\t\tu64 page_start = addr & (PAGE_SIZE - 1);\n+\t\tu64 first_len = PAGE_SIZE - (addr - page_start);\n+\n+\t\tmemcpy(to_buf, from_buf, first_len + metalen);\n+\t\tmemcpy(next_pg_addr, from_buf + first_len, len - first_len);\n+\n+\t\treturn;\n+\t}\n+\n+\tmemcpy(to_buf, from_buf, len + metalen);\n+}\n+\n static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)\n {\n-\tvoid *to_buf, *from_buf;\n+\tu64 offset = xs->umem->headroom;\n+\tvoid *from_buf;\n \tu32 metalen;\n \tu64 addr;\n \tint err;\n \n-\tif (!xskq_peek_addr(xs->umem->fq, &addr) ||\n+\tif (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||\n \t len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {\n \t\txs->rx_dropped++;\n \t\treturn -ENOSPC;\n \t}\n \n-\taddr += xs->umem->headroom;\n-\n \tif (unlikely(xdp_data_meta_unsupported(xdp))) {\n \t\tfrom_buf = xdp->data;\n \t\tmetalen = 0;\n@@ -78,9 +99,13 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)\n \t\tmetalen = xdp->data - xdp->data_meta;\n \t}\n \n-\tto_buf = xdp_umem_get_data(xs->umem, addr);\n-\tmemcpy(to_buf, from_buf, len + metalen);\n-\taddr += metalen;\n+\t__xsk_rcv_memcpy(xs->umem, addr + offset, from_buf, len, metalen);\n+\n+\toffset += metalen;\n+\tif (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)\n+\t\taddr |= offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;\n+\telse\n+\t\taddr += offset;\n \terr = xskq_produce_batch_desc(xs->rx, addr, len);\n \tif (!err) {\n \t\txskq_discard_addr(xs->umem->fq);\n@@ -127,6 +152,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)\n \tu32 len = xdp->data_end - xdp->data;\n \tvoid *buffer;\n \tu64 addr;\n+\tu64 offset = xs->umem->headroom;\n \tint err;\n \n \tspin_lock_bh(&xs->rx_lock);\n@@ -136,17 +162,20 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)\n \t\tgoto out_unlock;\n \t}\n \n-\tif (!xskq_peek_addr(xs->umem->fq, &addr) ||\n+\tif (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||\n \t len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {\n \t\terr = -ENOSPC;\n \t\tgoto out_drop;\n \t}\n \n-\taddr += xs->umem->headroom;\n-\n-\tbuffer = xdp_umem_get_data(xs->umem, addr);\n+\tbuffer = xdp_umem_get_data(xs->umem, addr + offset);\n \tmemcpy(buffer, xdp->data_meta, len + metalen);\n-\taddr += metalen;\n+\toffset += metalen;\n+\n+\tif (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)\n+\t\taddr |= offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;\n+\telse\n+\t\taddr += offset;\n \terr = xskq_produce_batch_desc(xs->rx, addr, len);\n \tif (err)\n \t\tgoto out_drop;\n@@ -190,7 +219,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)\n \n \trcu_read_lock();\n \tlist_for_each_entry_rcu(xs, &umem->xsk_list, list) {\n-\t\tif (!xskq_peek_desc(xs->tx, desc))\n+\t\tif (!xskq_peek_desc(xs->tx, desc, umem))\n \t\t\tcontinue;\n \n \t\tif (xskq_produce_addr_lazy(umem->cq, desc->addr))\n@@ -240,7 +269,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,\n \n \tmutex_lock(&xs->mutex);\n \n-\twhile (xskq_peek_desc(xs->tx, &desc)) {\n+\twhile (xskq_peek_desc(xs->tx, &desc, xs->umem)) {\n \t\tchar *buffer;\n \t\tu64 addr;\n \t\tu32 len;\n@@ -265,6 +294,10 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,\n \n \t\tskb_put(skb, len);\n \t\taddr = desc.addr;\n+\t\tif (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)\n+\t\t\taddr = (addr & XSK_UNALIGNED_BUF_ADDR_MASK) |\n+\t\t\t\t(addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);\n+\n \t\tbuffer = xdp_umem_get_data(xs->umem, addr);\n \t\terr = skb_store_bits(skb, 0, buffer, len);\n \t\tif (unlikely(err)) {\n@@ -275,7 +308,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,\n \t\tskb->dev = xs->dev;\n \t\tskb->priority = sk->sk_priority;\n \t\tskb->mark = sk->sk_mark;\n-\t\tskb_shinfo(skb)->destructor_arg = (void *)(long)addr;\n+\t\tskb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;\n \t\tskb->destructor = xsk_destruct_skb;\n \n \t\terr = dev_direct_xmit(skb, xs->queue_id);\n@@ -415,6 +448,28 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)\n \treturn sock;\n }\n \n+/* Check if umem pages are contiguous.\n+ * If zero-copy mode, use the DMA address to do the page contiguity check\n+ * For all other modes we use addr (kernel virtual address)\n+ */\n+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)\n+{\n+\tint i;\n+\n+\tif (flags & XDP_ZEROCOPY) {\n+\t\tfor (i = 0; i < umem->npgs - 1; i++)\n+\t\t\tumem->pages[i].next_pg_contig =\n+\t\t\t\t\t(umem->pages[i].dma + PAGE_SIZE ==\n+\t\t\t\t\t\tumem->pages[i + 1].dma);\n+\t\treturn;\n+\t}\n+\n+\tfor (i = 0; i < umem->npgs - 1; i++)\n+\t\tumem->pages[i].next_pg_contig =\n+\t\t\t\t(umem->pages[i].addr + PAGE_SIZE ==\n+\t\t\t\t\tumem->pages[i + 1].addr);\n+}\n+\n static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)\n {\n \tstruct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;\n@@ -502,6 +557,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)\n \t\terr = xdp_umem_assign_dev(xs->umem, dev, qid, flags);\n \t\tif (err)\n \t\t\tgoto out_unlock;\n+\n+\t\txsk_check_page_contiguity(xs->umem, flags);\n \t}\n \n \txs->dev = dev;\ndiff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c\nindex d5e06c8e0cbf..9986a759fe06 100644\n--- a/net/xdp/xsk_diag.c\n+++ b/net/xdp/xsk_diag.c\n@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)\n \tdu.id = umem->id;\n \tdu.size = umem->size;\n \tdu.num_pages = umem->npgs;\n-\tdu.chunk_size = (__u32)(~umem->chunk_mask + 1);\n+\tdu.chunk_size = umem->chunk_size_nohr + umem->headroom;\n \tdu.headroom = umem->headroom;\n \tdu.ifindex = umem->dev ? umem->dev->ifindex : 0;\n \tdu.queue_id = umem->queue_id;\ndiff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h\nindex 909c5168ed0f..04afc9de86d9 100644\n--- a/net/xdp/xsk_queue.h\n+++ b/net/xdp/xsk_queue.h\n@@ -133,6 +133,16 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)\n \n /* UMEM queue */\n \n+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,\n+\t\t\t\t\t u64 length)\n+{\n+\tbool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;\n+\tbool next_pg_contig =\n+\t\tumem->pages[(addr >> PAGE_SHIFT) + 1].next_pg_contig;\n+\n+\treturn cross_pg && !next_pg_contig;\n+}\n+\n static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)\n {\n \tif (addr >= q->size) {\n@@ -143,23 +153,52 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)\n \treturn true;\n }\n \n-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)\n+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,\n+\t\t\t\t\t\tu64 length,\n+\t\t\t\t\t\tstruct xdp_umem *umem)\n+{\n+\taddr += addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;\n+\taddr &= XSK_UNALIGNED_BUF_ADDR_MASK;\n+\tif (addr >= q->size ||\n+\t xskq_crosses_non_contig_pg(umem, addr, length)) {\n+\t\tq->invalid_descs++;\n+\t\treturn false;\n+\t}\n+\n+\treturn true;\n+}\n+\n+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,\n+\t\t\t\t struct xdp_umem *umem)\n {\n \twhile (q->cons_tail != q->cons_head) {\n \t\tstruct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;\n \t\tunsigned int idx = q->cons_tail & q->ring_mask;\n \n \t\t*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;\n+\t\tif (*addr & (~XSK_UNALIGNED_BUF_ADDR_MASK))\n+\t\t\tgoto out;\n+\n+\t\tif (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {\n+\t\t\tif (xskq_is_valid_addr_unaligned(q, *addr,\n+\t\t\t\t\t\t\t umem->chunk_size_nohr,\n+\t\t\t\t\t\t\t umem))\n+\t\t\t\treturn addr;\n+\t\t\tgoto out;\n+\t\t}\n+\n \t\tif (xskq_is_valid_addr(q, *addr))\n \t\t\treturn addr;\n \n+out:\n \t\tq->cons_tail++;\n \t}\n \n \treturn NULL;\n }\n \n-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)\n+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,\n+\t\t\t\t struct xdp_umem *umem)\n {\n \tif (q->cons_tail == q->cons_head) {\n \t\tsmp_mb(); /* D, matches A */\n@@ -170,7 +209,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)\n \t\tsmp_rmb();\n \t}\n \n-\treturn xskq_validate_addr(q, addr);\n+\treturn xskq_validate_addr(q, addr, umem);\n }\n \n static inline void xskq_discard_addr(struct xsk_queue *q)\n@@ -229,8 +268,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)\n \n /* Rx/Tx queue */\n \n-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)\n+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,\n+\t\t\t\t struct xdp_umem *umem)\n {\n+\tif (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {\n+\t\tif (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))\n+\t\t\treturn false;\n+\n+\t\tif (d->len > umem->chunk_size_nohr || d->options) {\n+\t\t\tq->invalid_descs++;\n+\t\t\treturn false;\n+\t\t}\n+\n+\t\treturn true;\n+\t}\n+\n \tif (!xskq_is_valid_addr(q, d->addr))\n \t\treturn false;\n \n@@ -244,14 +296,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)\n }\n \n static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,\n-\t\t\t\t\t\t struct xdp_desc *desc)\n+\t\t\t\t\t\t struct xdp_desc *desc,\n+\t\t\t\t\t\t struct xdp_umem *umem)\n {\n \twhile (q->cons_tail != q->cons_head) {\n \t\tstruct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;\n \t\tunsigned int idx = q->cons_tail & q->ring_mask;\n \n \t\t*desc = READ_ONCE(ring->desc[idx]);\n-\t\tif (xskq_is_valid_desc(q, desc))\n+\t\tif (xskq_is_valid_desc(q, desc, umem))\n \t\t\treturn desc;\n \n \t\tq->cons_tail++;\n@@ -261,7 +314,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,\n }\n \n static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,\n-\t\t\t\t\t struct xdp_desc *desc)\n+\t\t\t\t\t struct xdp_desc *desc,\n+\t\t\t\t\t struct xdp_umem *umem)\n {\n \tif (q->cons_tail == q->cons_head) {\n \t\tsmp_mb(); /* D, matches A */\n@@ -272,7 +326,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,\n \t\tsmp_rmb(); /* C, matches B */\n \t}\n \n-\treturn xskq_validate_desc(q, desc);\n+\treturn xskq_validate_desc(q, desc, umem);\n }\n \n static inline void xskq_discard_desc(struct xsk_queue *q)\n", "prefixes": [ "v2", "03/10" ] }