Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/925073/?format=api
{ "id": 925073, "url": "http://patchwork.ozlabs.org/api/patches/925073/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20180604120601.18123-10-bjorn.topel@gmail.com/", "project": { "id": 46, "url": "http://patchwork.ozlabs.org/api/projects/46/?format=api", "name": "Intel Wired Ethernet development", "link_name": "intel-wired-lan", "list_id": "intel-wired-lan.osuosl.org", "list_email": "intel-wired-lan@osuosl.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20180604120601.18123-10-bjorn.topel@gmail.com>", "list_archive_url": null, "date": "2018-06-04T12:05:59", "name": "[bpf-next,09/11] i40e: implement AF_XDP zero-copy support for Rx", "commit_ref": null, "pull_url": null, "state": "changes-requested", "archived": false, "hash": "3e590774f294d1f67631a998a5f0de2b8fe94f63", "submitter": { "id": 70569, "url": "http://patchwork.ozlabs.org/api/people/70569/?format=api", "name": "Björn Töpel", "email": "bjorn.topel@gmail.com" }, "delegate": { "id": 68, "url": "http://patchwork.ozlabs.org/api/users/68/?format=api", "username": "jtkirshe", "first_name": "Jeff", "last_name": "Kirsher", "email": "jeffrey.t.kirsher@intel.com" }, "mbox": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20180604120601.18123-10-bjorn.topel@gmail.com/mbox/", "series": [ { "id": 48416, "url": "http://patchwork.ozlabs.org/api/series/48416/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/list/?series=48416", "date": "2018-06-04T12:05:50", "name": "AF_XDP: introducing zero-copy support", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/48416/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/925073/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/925073/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<intel-wired-lan-bounces@osuosl.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Delivered-To": [ "patchwork-incoming@bilbo.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=osuosl.org\n\t(client-ip=140.211.166.138; helo=whitealder.osuosl.org;\n\tenvelope-from=intel-wired-lan-bounces@osuosl.org;\n\treceiver=<UNKNOWN>)", "ozlabs.org;\n\tdmarc=fail (p=none dis=none) header.from=gmail.com" ], "Received": [ "from whitealder.osuosl.org (smtp1.osuosl.org [140.211.166.138])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 40zys51sDxz9s08\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 5 Jun 2018 01:04:41 +1000 (AEST)", "from localhost (localhost [127.0.0.1])\n\tby whitealder.osuosl.org (Postfix) with ESMTP id D546F8792A;\n\tMon, 4 Jun 2018 15:04:39 +0000 (UTC)", "from whitealder.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id w+twR1BiM0ph; Mon, 4 Jun 2018 15:04:29 +0000 (UTC)", "from ash.osuosl.org (ash.osuosl.org [140.211.166.34])\n\tby whitealder.osuosl.org (Postfix) with ESMTP id 807AF87A17;\n\tMon, 4 Jun 2018 15:04:29 +0000 (UTC)", "from silver.osuosl.org (smtp3.osuosl.org [140.211.166.136])\n\tby ash.osuosl.org (Postfix) with ESMTP id 4F3F31BFFD0\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 4 Jun 2018 12:07:08 +0000 (UTC)", "from localhost (localhost [127.0.0.1])\n\tby silver.osuosl.org (Postfix) with ESMTP id 4ADC12CEDE\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 4 Jun 2018 12:07:08 +0000 (UTC)", "from silver.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id 8PL15JYzgwpe for <intel-wired-lan@lists.osuosl.org>;\n\tMon, 4 Jun 2018 12:07:03 +0000 (UTC)", "from mga07.intel.com (mga07.intel.com [134.134.136.100])\n\tby silver.osuosl.org (Postfix) with ESMTPS id 8F2B026E12\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 4 Jun 2018 12:07:03 +0000 (UTC)", "from fmsmga004.fm.intel.com ([10.253.24.48])\n\tby orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t04 Jun 2018 05:07:03 -0700", "from btopel-mobl1.isw.intel.com (HELO\n\tbtopel-mobl1.hil-pdxphhh.sea.wayport.net) ([10.103.211.148])\n\tby fmsmga004.fm.intel.com with ESMTP; 04 Jun 2018 05:06:56 -0700" ], "X-Virus-Scanned": [ "amavisd-new at osuosl.org", "amavisd-new at osuosl.org" ], "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6", "X-Amp-Result": "SKIPPED(no attachment in message)", "X-Amp-File-Uploaded": "False", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.49,476,1520924400\"; d=\"scan'208\";a=\"60197234\"", "From": "=?utf-8?b?QmrDtnJuIFTDtnBlbA==?= <bjorn.topel@gmail.com>", "To": "bjorn.topel@gmail.com, magnus.karlsson@intel.com,\n\tmagnus.karlsson@gmail.com, alexander.h.duyck@intel.com,\n\talexander.duyck@gmail.com, ast@fb.com, brouer@redhat.com,\n\tdaniel@iogearbox.net, netdev@vger.kernel.org, mykyta.iziumtsev@linaro.org", "Date": "Mon, 4 Jun 2018 14:05:59 +0200", "Message-Id": "<20180604120601.18123-10-bjorn.topel@gmail.com>", "X-Mailer": "git-send-email 2.14.1", "In-Reply-To": "<20180604120601.18123-1-bjorn.topel@gmail.com>", "References": "<20180604120601.18123-1-bjorn.topel@gmail.com>", "MIME-Version": "1.0", "X-Mailman-Approved-At": "Mon, 04 Jun 2018 15:04:25 +0000", "Subject": "[Intel-wired-lan] [PATCH bpf-next 09/11] i40e: implement AF_XDP\n\tzero-copy support for Rx", "X-BeenThere": "intel-wired-lan@osuosl.org", "X-Mailman-Version": "2.1.24", "Precedence": "list", "List-Id": "Intel Wired Ethernet Linux Kernel Driver Development\n\t<intel-wired-lan.osuosl.org>", "List-Unsubscribe": "<https://lists.osuosl.org/mailman/options/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=unsubscribe>", "List-Archive": "<http://lists.osuosl.org/pipermail/intel-wired-lan/>", "List-Post": "<mailto:intel-wired-lan@osuosl.org>", "List-Help": "<mailto:intel-wired-lan-request@osuosl.org?subject=help>", "List-Subscribe": "<https://lists.osuosl.org/mailman/listinfo/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=subscribe>", "Cc": "francois.ozog@linaro.org, willemdebruijn.kernel@gmail.com, mst@redhat.com,\n\tilias.apalodimas@linaro.org, michael.lundkvist@ericsson.com,\n\tbrian.brooks@linaro.org, intel-wired-lan@lists.osuosl.org, \n\tqi.z.zhang@intel.com, michael.chan@broadcom.com, =?utf-8?b?QmrDtnJu?=\n\t=?utf-8?q?_T=C3=B6pel?= <bjorn.topel@intel.com>, andy@greyhouse.net", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "base64", "Errors-To": "intel-wired-lan-bounces@osuosl.org", "Sender": "\"Intel-wired-lan\" <intel-wired-lan-bounces@osuosl.org>" }, "content": "From: Björn Töpel <bjorn.topel@intel.com>\n\nThis commit adds initial AF_XDP zero-copy support for i40e-based\nNICs. First we add support for the new XDP_QUERY_XSK_UMEM and\nXDP_SETUP_XSK_UMEM commands in ndo_bpf. This allows the AF_XDP socket\nto pass a UMEM to the driver. The driver will then DMA map all the\nframes in the UMEM for the driver. Next, the Rx code will allocate\nframes from the UMEM fill queue, instead of the regular page\nallocator.\n\nExternally, for the rest of the XDP code, the driver internal UMEM\nallocator will appear as a MEM_TYPE_ZERO_COPY.\n\nThe commit also introduces a completely new clean_rx_irq/allocator\nfunctions for zero-copy, and means (functions pointers) to set\nallocators and clean_rx functions.\n\nThis first version does not support:\n* passing frames to the stack via XDP_PASS (clone/copy to skb).\n* doing XDP redirect to other than AF_XDP sockets\n (convert_to_xdp_frame does not clone the frame yet).\n\nSigned-off-by: Björn Töpel <bjorn.topel@intel.com>\n---\n drivers/net/ethernet/intel/i40e/Makefile | 3 +-\n drivers/net/ethernet/intel/i40e/i40e.h | 23 ++\n drivers/net/ethernet/intel/i40e/i40e_main.c | 35 +-\n drivers/net/ethernet/intel/i40e/i40e_txrx.c | 163 ++-------\n drivers/net/ethernet/intel/i40e/i40e_txrx.h | 128 ++++++-\n drivers/net/ethernet/intel/i40e/i40e_xsk.c | 537 ++++++++++++++++++++++++++++\n drivers/net/ethernet/intel/i40e/i40e_xsk.h | 17 +\n include/net/xdp_sock.h | 19 +\n net/xdp/xdp_umem.h | 10 -\n 9 files changed, 789 insertions(+), 146 deletions(-)\n create mode 100644 drivers/net/ethernet/intel/i40e/i40e_xsk.c\n create mode 100644 drivers/net/ethernet/intel/i40e/i40e_xsk.h", "diff": "diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile\nindex 14397e7e9925..50590e8d1fd1 100644\n--- a/drivers/net/ethernet/intel/i40e/Makefile\n+++ b/drivers/net/ethernet/intel/i40e/Makefile\n@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \\\n \ti40e_txrx.o\t\\\n \ti40e_ptp.o\t\\\n \ti40e_client.o \\\n-\ti40e_virtchnl_pf.o\n+\ti40e_virtchnl_pf.o \\\n+\ti40e_xsk.o\n \n i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h\nindex 7a80652e2500..20955e5dce02 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e.h\n+++ b/drivers/net/ethernet/intel/i40e/i40e.h\n@@ -786,6 +786,12 @@ struct i40e_vsi {\n \n \t/* VSI specific handlers */\n \tirqreturn_t (*irq_handler)(int irq, void *data);\n+\n+\t/* AF_XDP zero-copy */\n+\tstruct xdp_umem **xsk_umems;\n+\tu16 num_xsk_umems_used;\n+\tu16 num_xsk_umems;\n+\n } ____cacheline_internodealigned_in_smp;\n \n struct i40e_netdev_priv {\n@@ -1090,6 +1096,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)\n \treturn !!vsi->xdp_prog;\n }\n \n+static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)\n+{\n+\tbool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);\n+\tint qid = ring->queue_index;\n+\n+\tif (ring_is_xdp(ring))\n+\t\tqid -= ring->vsi->alloc_queue_pairs;\n+\n+\tif (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on)\n+\t\treturn NULL;\n+\n+\treturn ring->vsi->xsk_umems[qid];\n+}\n+\n int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);\n int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);\n int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,\n@@ -1098,4 +1118,7 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,\n int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,\n \t\t\t\t struct i40e_cloud_filter *filter,\n \t\t\t\t bool add);\n+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);\n+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);\n+\n #endif /* _I40E_H_ */\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c\nindex 369a116edaa1..8c602424d339 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_main.c\n+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c\n@@ -5,6 +5,7 @@\n #include <linux/of_net.h>\n #include <linux/pci.h>\n #include <linux/bpf.h>\n+#include <net/xdp_sock.h>\n \n /* Local includes */\n #include \"i40e.h\"\n@@ -16,6 +17,7 @@\n */\n #define CREATE_TRACE_POINTS\n #include \"i40e_trace.h\"\n+#include \"i40e_xsk.h\"\n \n const char i40e_driver_name[] = \"i40e\";\n static const char i40e_driver_string[] =\n@@ -3071,6 +3073,9 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)\n \ti40e_status err = 0;\n \tu32 qtx_ctl = 0;\n \n+\tif (ring_is_xdp(ring))\n+\t\tring->xsk_umem = i40e_xsk_umem(ring);\n+\n \t/* some ATR related tx ring init */\n \tif (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {\n \t\tring->atr_sample_rate = vsi->back->atr_sample_rate;\n@@ -3180,13 +3185,30 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)\n \tstruct i40e_hw *hw = &vsi->back->hw;\n \tstruct i40e_hmc_obj_rxq rx_ctx;\n \ti40e_status err = 0;\n+\tint ret;\n \n \tbitmap_zero(ring->state, __I40E_RING_STATE_NBITS);\n \n \t/* clear the context structure first */\n \tmemset(&rx_ctx, 0, sizeof(rx_ctx));\n \n-\tring->rx_buf_len = vsi->rx_buf_len;\n+\tring->xsk_umem = i40e_xsk_umem(ring);\n+\tif (ring->xsk_umem) {\n+\t\tring->clean_rx_irq = i40e_clean_rx_irq_zc;\n+\t\tring->alloc_rx_buffers = i40e_alloc_rx_buffers_zc;\n+\t\tring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -\n+\t\t\t\t XDP_PACKET_HEADROOM;\n+\t\tring->zca.free = i40e_zca_free;\n+\t\tret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,\n+\t\t\t\t\t\t MEM_TYPE_ZERO_COPY,\n+\t\t\t\t\t\t &ring->zca);\n+\t\tif (ret)\n+\t\t\treturn ret;\n+\t} else {\n+\t\tring->clean_rx_irq = i40e_clean_rx_irq;\n+\t\tring->alloc_rx_buffers = i40e_alloc_rx_buffers;\n+\t\tring->rx_buf_len = vsi->rx_buf_len;\n+\t}\n \n \trx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,\n \t\t\t\t BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));\n@@ -3242,7 +3264,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)\n \tring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);\n \twritel(0, ring->tail);\n \n-\ti40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));\n+\tring->alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));\n \n \treturn 0;\n }\n@@ -12022,7 +12044,7 @@ static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)\n *\n * Returns 0 on success, <0 on failure.\n **/\n-static int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)\n+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)\n {\n \tint err;\n \n@@ -12047,7 +12069,7 @@ static int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)\n *\n * Returns 0 on success, <0 on failure.\n **/\n-static int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)\n+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)\n {\n \tint err;\n \n@@ -12095,6 +12117,11 @@ static int i40e_xdp(struct net_device *dev,\n \t\txdp->prog_attached = i40e_enabled_xdp_vsi(vsi);\n \t\txdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;\n \t\treturn 0;\n+\tcase XDP_QUERY_XSK_UMEM:\n+\t\treturn 0;\n+\tcase XDP_SETUP_XSK_UMEM:\n+\t\treturn i40e_xsk_umem_setup(vsi, xdp->xsk.umem,\n+\t\t\t\t\t xdp->xsk.queue_id);\n \tdefault:\n \t\treturn -EINVAL;\n \t}\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c\nindex 5f01e4ce9c92..6b1142fbc697 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c\n+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c\n@@ -5,6 +5,7 @@\n #include <net/busy_poll.h>\n #include <linux/bpf_trace.h>\n #include <net/xdp.h>\n+#include <net/xdp_sock.h>\n #include \"i40e.h\"\n #include \"i40e_trace.h\"\n #include \"i40e_prototype.h\"\n@@ -536,8 +537,8 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,\n * This is used to verify if the FD programming or invalidation\n * requested by SW to the HW is successful or not and take actions accordingly.\n **/\n-static void i40e_fd_handle_status(struct i40e_ring *rx_ring,\n-\t\t\t\t union i40e_rx_desc *rx_desc, u8 prog_id)\n+void i40e_fd_handle_status(struct i40e_ring *rx_ring,\n+\t\t\t union i40e_rx_desc *rx_desc, u8 prog_id)\n {\n \tstruct i40e_pf *pf = rx_ring->vsi->back;\n \tstruct pci_dev *pdev = pf->pdev;\n@@ -1246,25 +1247,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,\n \tnew_buff->pagecnt_bias\t= old_buff->pagecnt_bias;\n }\n \n-/**\n- * i40e_rx_is_programming_status - check for programming status descriptor\n- * @qw: qword representing status_error_len in CPU ordering\n- *\n- * The value of in the descriptor length field indicate if this\n- * is a programming status descriptor for flow director or FCoE\n- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise\n- * it is a packet descriptor.\n- **/\n-static inline bool i40e_rx_is_programming_status(u64 qw)\n-{\n-\t/* The Rx filter programming status and SPH bit occupy the same\n-\t * spot in the descriptor. Since we don't support packet split we\n-\t * can just reuse the bit as an indication that this is a\n-\t * programming status descriptor.\n-\t */\n-\treturn qw & I40E_RXD_QW1_LENGTH_SPH_MASK;\n-}\n-\n /**\n * i40e_clean_programming_status - clean the programming status descriptor\n * @rx_ring: the rx ring that has this descriptor\n@@ -1373,31 +1355,35 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)\n \t}\n \n \t/* Free all the Rx ring sk_buffs */\n-\tfor (i = 0; i < rx_ring->count; i++) {\n-\t\tstruct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];\n-\n-\t\tif (!rx_bi->page)\n-\t\t\tcontinue;\n+\tif (!rx_ring->xsk_umem) {\n+\t\tfor (i = 0; i < rx_ring->count; i++) {\n+\t\t\tstruct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];\n \n-\t\t/* Invalidate cache lines that may have been written to by\n-\t\t * device so that we avoid corrupting memory.\n-\t\t */\n-\t\tdma_sync_single_range_for_cpu(rx_ring->dev,\n-\t\t\t\t\t rx_bi->dma,\n-\t\t\t\t\t rx_bi->page_offset,\n-\t\t\t\t\t rx_ring->rx_buf_len,\n-\t\t\t\t\t DMA_FROM_DEVICE);\n-\n-\t\t/* free resources associated with mapping */\n-\t\tdma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,\n-\t\t\t\t i40e_rx_pg_size(rx_ring),\n-\t\t\t\t DMA_FROM_DEVICE,\n-\t\t\t\t I40E_RX_DMA_ATTR);\n-\n-\t\t__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);\n+\t\t\tif (!rx_bi->page)\n+\t\t\t\tcontinue;\n \n-\t\trx_bi->page = NULL;\n-\t\trx_bi->page_offset = 0;\n+\t\t\t/* Invalidate cache lines that may have been\n+\t\t\t * written to by device so that we avoid\n+\t\t\t * corrupting memory.\n+\t\t\t */\n+\t\t\tdma_sync_single_range_for_cpu(rx_ring->dev,\n+\t\t\t\t\t\t rx_bi->dma,\n+\t\t\t\t\t\t rx_bi->page_offset,\n+\t\t\t\t\t\t rx_ring->rx_buf_len,\n+\t\t\t\t\t\t DMA_FROM_DEVICE);\n+\n+\t\t\t/* free resources associated with mapping */\n+\t\t\tdma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,\n+\t\t\t\t\t i40e_rx_pg_size(rx_ring),\n+\t\t\t\t\t DMA_FROM_DEVICE,\n+\t\t\t\t\t I40E_RX_DMA_ATTR);\n+\n+\t\t\t__page_frag_cache_drain(rx_bi->page,\n+\t\t\t\t\t\trx_bi->pagecnt_bias);\n+\n+\t\t\trx_bi->page = NULL;\n+\t\t\trx_bi->page_offset = 0;\n+\t\t}\n \t}\n \n \tbi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;\n@@ -1487,27 +1473,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)\n \treturn err;\n }\n \n-/**\n- * i40e_release_rx_desc - Store the new tail and head values\n- * @rx_ring: ring to bump\n- * @val: new head index\n- **/\n-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)\n-{\n-\trx_ring->next_to_use = val;\n-\n-\t/* update next to alloc since we have filled the ring */\n-\trx_ring->next_to_alloc = val;\n-\n-\t/* Force memory writes to complete before letting h/w\n-\t * know there are new descriptors to fetch. (Only\n-\t * applicable for weak-ordered memory model archs,\n-\t * such as IA-64).\n-\t */\n-\twmb();\n-\twritel(val, rx_ring->tail);\n-}\n-\n /**\n * i40e_rx_offset - Return expected offset into page to access data\n * @rx_ring: Ring we are requesting offset of\n@@ -1576,8 +1541,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,\n * @skb: packet to send up\n * @vlan_tag: vlan tag for packet\n **/\n-static void i40e_receive_skb(struct i40e_ring *rx_ring,\n-\t\t\t struct sk_buff *skb, u16 vlan_tag)\n+void i40e_receive_skb(struct i40e_ring *rx_ring,\n+\t\t struct sk_buff *skb, u16 vlan_tag)\n {\n \tstruct i40e_q_vector *q_vector = rx_ring->q_vector;\n \n@@ -1804,7 +1769,6 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,\n * order to populate the hash, checksum, VLAN, protocol, and\n * other fields within the skb.\n **/\n-static inline\n void i40e_process_skb_fields(struct i40e_ring *rx_ring,\n \t\t\t union i40e_rx_desc *rx_desc, struct sk_buff *skb,\n \t\t\t u8 rx_ptype)\n@@ -1829,46 +1793,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,\n \tskb->protocol = eth_type_trans(skb, rx_ring->netdev);\n }\n \n-/**\n- * i40e_cleanup_headers - Correct empty headers\n- * @rx_ring: rx descriptor ring packet is being transacted on\n- * @skb: pointer to current skb being fixed\n- * @rx_desc: pointer to the EOP Rx descriptor\n- *\n- * Also address the case where we are pulling data in on pages only\n- * and as such no data is present in the skb header.\n- *\n- * In addition if skb is not at least 60 bytes we need to pad it so that\n- * it is large enough to qualify as a valid Ethernet frame.\n- *\n- * Returns true if an error was encountered and skb was freed.\n- **/\n-static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,\n-\t\t\t\t union i40e_rx_desc *rx_desc)\n-\n-{\n-\t/* XDP packets use error pointer so abort at this point */\n-\tif (IS_ERR(skb))\n-\t\treturn true;\n-\n-\t/* ERR_MASK will only have valid bits if EOP set, and\n-\t * what we are doing here is actually checking\n-\t * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in\n-\t * the error field\n-\t */\n-\tif (unlikely(i40e_test_staterr(rx_desc,\n-\t\t\t\t BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {\n-\t\tdev_kfree_skb_any(skb);\n-\t\treturn true;\n-\t}\n-\n-\t/* if eth_skb_pad returns an error the skb was freed */\n-\tif (eth_skb_pad(skb))\n-\t\treturn true;\n-\n-\treturn false;\n-}\n-\n /**\n * i40e_page_is_reusable - check if any reuse is possible\n * @page: page struct to check\n@@ -2177,15 +2101,11 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,\n \treturn true;\n }\n \n-#define I40E_XDP_PASS 0\n-#define I40E_XDP_CONSUMED 1\n-#define I40E_XDP_TX 2\n-\n static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,\n \t\t\t struct i40e_ring *xdp_ring);\n \n-static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,\n-\t\t\t\t struct i40e_ring *xdp_ring)\n+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,\n+\t\t\t struct i40e_ring *xdp_ring)\n {\n \tstruct xdp_frame *xdpf = convert_to_xdp_frame(xdp);\n \n@@ -2214,8 +2134,6 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,\n \tif (!xdp_prog)\n \t\tgoto xdp_out;\n \n-\tprefetchw(xdp->data_hard_start); /* xdp_frame write */\n-\n \tact = bpf_prog_run_xdp(xdp_prog, xdp);\n \tswitch (act) {\n \tcase XDP_PASS:\n@@ -2263,15 +2181,6 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,\n #endif\n }\n \n-static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)\n-{\n-\t/* Force memory writes to complete before letting h/w\n-\t * know there are new descriptors to fetch.\n-\t */\n-\twmb();\n-\twritel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);\n-}\n-\n /**\n * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf\n * @rx_ring: rx descriptor ring to transact packets on\n@@ -2284,7 +2193,7 @@ static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)\n *\n * Returns amount of work completed\n **/\n-static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)\n+int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)\n {\n \tunsigned int total_rx_bytes = 0, total_rx_packets = 0;\n \tstruct sk_buff *skb = rx_ring->skb;\n@@ -2576,7 +2485,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)\n \tbudget_per_ring = max(budget/q_vector->num_ringpairs, 1);\n \n \ti40e_for_each_ring(ring, q_vector->rx) {\n-\t\tint cleaned = i40e_clean_rx_irq(ring, budget_per_ring);\n+\t\tint cleaned = ring->clean_rx_irq(ring, budget_per_ring);\n \n \t\twork_done += cleaned;\n \t\t/* if we clean as many as budgeted, we must not be done */\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h\nindex 820f76db251b..cddb185cd2f8 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h\n+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h\n@@ -296,13 +296,22 @@ struct i40e_tx_buffer {\n \n struct i40e_rx_buffer {\n \tdma_addr_t dma;\n-\tstruct page *page;\n+\tunion {\n+\t\tstruct {\n+\t\t\tstruct page *page;\n #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)\n-\t__u32 page_offset;\n+\t\t\t__u32 page_offset;\n #else\n-\t__u16 page_offset;\n+\t\t\t__u16 page_offset;\n #endif\n-\t__u16 pagecnt_bias;\n+\t\t\t__u16 pagecnt_bias;\n+\t\t};\n+\t\tstruct {\n+\t\t\t/* for umem */\n+\t\t\tvoid *addr;\n+\t\t\tu64 handle;\n+\t\t};\n+\t};\n };\n \n struct i40e_queue_stats {\n@@ -414,6 +423,12 @@ struct i40e_ring {\n \n \tstruct i40e_channel *ch;\n \tstruct xdp_rxq_info xdp_rxq;\n+\n+\tint (*clean_rx_irq)(struct i40e_ring *ring, int budget);\n+\tbool (*alloc_rx_buffers)(struct i40e_ring *ring, u16 n);\n+\tstruct xdp_umem *xsk_umem;\n+\n+\tstruct zero_copy_allocator zca; /* ZC allocator anchor */\n } ____cacheline_internodealigned_in_smp;\n \n static inline bool ring_uses_build_skb(struct i40e_ring *ring)\n@@ -490,6 +505,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb);\n int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,\n \t\t u32 flags);\n void i40e_xdp_flush(struct net_device *dev);\n+int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget);\n \n /**\n * i40e_get_head - Retrieve head from head writeback\n@@ -576,4 +592,108 @@ static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)\n {\n \treturn netdev_get_tx_queue(ring->netdev, ring->queue_index);\n }\n+\n+#define I40E_XDP_PASS 0\n+#define I40E_XDP_CONSUMED 1\n+#define I40E_XDP_TX 2\n+\n+/**\n+ * i40e_release_rx_desc - Store the new tail and head values\n+ * @rx_ring: ring to bump\n+ * @val: new head index\n+ **/\n+static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)\n+{\n+\trx_ring->next_to_use = val;\n+\n+\t/* update next to alloc since we have filled the ring */\n+\trx_ring->next_to_alloc = val;\n+\n+\t/* Force memory writes to complete before letting h/w\n+\t * know there are new descriptors to fetch. (Only\n+\t * applicable for weak-ordered memory model archs,\n+\t * such as IA-64).\n+\t */\n+\twmb();\n+\twritel(val, rx_ring->tail);\n+}\n+\n+/**\n+ * i40e_rx_is_programming_status - check for programming status descriptor\n+ * @qw: qword representing status_error_len in CPU ordering\n+ *\n+ * The value of in the descriptor length field indicate if this\n+ * is a programming status descriptor for flow director or FCoE\n+ * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise\n+ * it is a packet descriptor.\n+ **/\n+static inline bool i40e_rx_is_programming_status(u64 qw)\n+{\n+\t/* The Rx filter programming status and SPH bit occupy the same\n+\t * spot in the descriptor. Since we don't support packet split we\n+\t * can just reuse the bit as an indication that this is a\n+\t * programming status descriptor.\n+\t */\n+\treturn qw & I40E_RXD_QW1_LENGTH_SPH_MASK;\n+}\n+\n+/**\n+ * i40e_cleanup_headers - Correct empty headers\n+ * @rx_ring: rx descriptor ring packet is being transacted on\n+ * @skb: pointer to current skb being fixed\n+ * @rx_desc: pointer to the EOP Rx descriptor\n+ *\n+ * Also address the case where we are pulling data in on pages only\n+ * and as such no data is present in the skb header.\n+ *\n+ * In addition if skb is not at least 60 bytes we need to pad it so that\n+ * it is large enough to qualify as a valid Ethernet frame.\n+ *\n+ * Returns true if an error was encountered and skb was freed.\n+ **/\n+static inline bool i40e_cleanup_headers(struct i40e_ring *rx_ring,\n+\t\t\t\t\tstruct sk_buff *skb,\n+\t\t\t\t\tunion i40e_rx_desc *rx_desc)\n+\n+{\n+\t/* XDP packets use error pointer so abort at this point */\n+\tif (IS_ERR(skb))\n+\t\treturn true;\n+\n+\t/* ERR_MASK will only have valid bits if EOP set, and\n+\t * what we are doing here is actually checking\n+\t * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in\n+\t * the error field\n+\t */\n+\tif (unlikely(i40e_test_staterr(rx_desc,\n+\t\t\t\t BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {\n+\t\tdev_kfree_skb_any(skb);\n+\t\treturn true;\n+\t}\n+\n+\t/* if eth_skb_pad returns an error the skb was freed */\n+\tif (eth_skb_pad(skb))\n+\t\treturn true;\n+\n+\treturn false;\n+}\n+\n+static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)\n+{\n+\t/* Force memory writes to complete before letting h/w\n+\t * know there are new descriptors to fetch.\n+\t */\n+\twmb();\n+\twritel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);\n+}\n+\n+void i40e_fd_handle_status(struct i40e_ring *rx_ring,\n+\t\t\t union i40e_rx_desc *rx_desc, u8 prog_id);\n+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,\n+\t\t\t struct i40e_ring *xdp_ring);\n+void i40e_process_skb_fields(struct i40e_ring *rx_ring,\n+\t\t\t union i40e_rx_desc *rx_desc, struct sk_buff *skb,\n+\t\t\t u8 rx_ptype);\n+void i40e_receive_skb(struct i40e_ring *rx_ring,\n+\t\t struct sk_buff *skb, u16 vlan_tag);\n #endif /* _I40E_TXRX_H_ */\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c\nnew file mode 100644\nindex 000000000000..9d16924415b9\n--- /dev/null\n+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c\n@@ -0,0 +1,537 @@\n+// SPDX-License-Identifier: GPL-2.0\n+/* Copyright(c) 2018 Intel Corporation. */\n+\n+#include <linux/bpf_trace.h>\n+#include <net/xdp_sock.h>\n+#include <net/xdp.h>\n+\n+#include \"i40e.h\"\n+#include \"i40e_txrx.h\"\n+\n+static int i40e_alloc_xsk_umems(struct i40e_vsi *vsi)\n+{\n+\tif (vsi->xsk_umems)\n+\t\treturn 0;\n+\n+\tvsi->num_xsk_umems_used = 0;\n+\tvsi->num_xsk_umems = vsi->alloc_queue_pairs;\n+\tvsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),\n+\t\t\t\t GFP_KERNEL);\n+\tif (!vsi->xsk_umems) {\n+\t\tvsi->num_xsk_umems = 0;\n+\t\treturn -ENOMEM;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static int i40e_add_xsk_umem(struct i40e_vsi *vsi, struct xdp_umem *umem,\n+\t\t\t u16 qid)\n+{\n+\tint err;\n+\n+\terr = i40e_alloc_xsk_umems(vsi);\n+\tif (err)\n+\t\treturn err;\n+\n+\tvsi->xsk_umems[qid] = umem;\n+\tvsi->num_xsk_umems_used++;\n+\n+\treturn 0;\n+}\n+\n+static void i40e_remove_xsk_umem(struct i40e_vsi *vsi, u16 qid)\n+{\n+\tvsi->xsk_umems[qid] = NULL;\n+\tvsi->num_xsk_umems_used--;\n+\n+\tif (vsi->num_xsk_umems == 0) {\n+\t\tkfree(vsi->xsk_umems);\n+\t\tvsi->xsk_umems = NULL;\n+\t\tvsi->num_xsk_umems = 0;\n+\t}\n+}\n+\n+static int i40e_xsk_umem_dma_map(struct i40e_vsi *vsi, struct xdp_umem *umem)\n+{\n+\tstruct i40e_pf *pf = vsi->back;\n+\tstruct device *dev;\n+\tunsigned int i, j;\n+\tdma_addr_t dma;\n+\n+\tdev = &pf->pdev->dev;\n+\tfor (i = 0; i < umem->npgs; i++) {\n+\t\tdma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,\n+\t\t\t\t\t DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);\n+\t\tif (dma_mapping_error(dev, dma))\n+\t\t\tgoto out_unmap;\n+\n+\t\tumem->pages[i].dma = dma;\n+\t}\n+\n+\treturn 0;\n+\n+out_unmap:\n+\tfor (j = 0; j < i; j++) {\n+\t\tdma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,\n+\t\t\t\t DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);\n+\t\tumem->pages[i].dma = 0;\n+\t}\n+\n+\treturn -1;\n+}\n+\n+static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)\n+{\n+\tstruct i40e_pf *pf = vsi->back;\n+\tstruct device *dev;\n+\tunsigned int i;\n+\n+\tdev = &pf->pdev->dev;\n+\n+\tfor (i = 0; i < umem->npgs; i++) {\n+\t\tdma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,\n+\t\t\t\t DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);\n+\n+\t\tumem->pages[i].dma = 0;\n+\t}\n+}\n+\n+static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,\n+\t\t\t\tu16 qid)\n+{\n+\tbool if_running;\n+\tint err;\n+\n+\tif (vsi->type != I40E_VSI_MAIN)\n+\t\treturn -EINVAL;\n+\n+\tif (qid >= vsi->num_queue_pairs)\n+\t\treturn -EINVAL;\n+\n+\tif (vsi->xsk_umems && vsi->xsk_umems[qid])\n+\t\treturn -EBUSY;\n+\n+\terr = i40e_xsk_umem_dma_map(vsi, umem);\n+\tif (err)\n+\t\treturn err;\n+\n+\tif_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);\n+\n+\tif (if_running) {\n+\t\terr = i40e_queue_pair_disable(vsi, qid);\n+\t\tif (err)\n+\t\t\treturn err;\n+\t}\n+\n+\terr = i40e_add_xsk_umem(vsi, umem, qid);\n+\tif (err)\n+\t\treturn err;\n+\n+\tif (if_running) {\n+\t\terr = i40e_queue_pair_enable(vsi, qid);\n+\t\tif (err)\n+\t\t\treturn err;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)\n+{\n+\tbool if_running;\n+\tint err;\n+\n+\tif (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||\n+\t !vsi->xsk_umems[qid])\n+\t\treturn -EINVAL;\n+\n+\tif_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);\n+\n+\tif (if_running) {\n+\t\terr = i40e_queue_pair_disable(vsi, qid);\n+\t\tif (err)\n+\t\t\treturn err;\n+\t}\n+\n+\ti40e_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);\n+\ti40e_remove_xsk_umem(vsi, qid);\n+\n+\tif (if_running) {\n+\t\terr = i40e_queue_pair_enable(vsi, qid);\n+\t\tif (err)\n+\t\t\treturn err;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,\n+\t\t\tu16 qid)\n+{\n+\tif (umem)\n+\t\treturn i40e_xsk_umem_enable(vsi, umem, qid);\n+\n+\treturn i40e_xsk_umem_disable(vsi, qid);\n+}\n+\n+static struct sk_buff *i40e_run_xdp_zc(struct i40e_ring *rx_ring,\n+\t\t\t\t struct xdp_buff *xdp)\n+{\n+\tint err, result = I40E_XDP_PASS;\n+\tstruct i40e_ring *xdp_ring;\n+\tstruct bpf_prog *xdp_prog;\n+\tu32 act;\n+\tu16 off;\n+\n+\trcu_read_lock();\n+\txdp_prog = READ_ONCE(rx_ring->xdp_prog);\n+\tact = bpf_prog_run_xdp(xdp_prog, xdp);\n+\toff = xdp->data - xdp->data_hard_start;\n+\txdp->handle += off;\n+\tswitch (act) {\n+\tcase XDP_PASS:\n+\t\tbreak;\n+\tcase XDP_TX:\n+\t\txdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];\n+\t\tresult = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);\n+\t\tbreak;\n+\tcase XDP_REDIRECT:\n+\t\terr = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);\n+\t\tresult = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED;\n+\t\tbreak;\n+\tdefault:\n+\t\tbpf_warn_invalid_xdp_action(act);\n+\tcase XDP_ABORTED:\n+\t\ttrace_xdp_exception(rx_ring->netdev, xdp_prog, act);\n+\t\t/* fallthrough -- handle aborts by dropping packet */\n+\tcase XDP_DROP:\n+\t\tresult = I40E_XDP_CONSUMED;\n+\t\tbreak;\n+\t}\n+\n+\trcu_read_unlock();\n+\treturn ERR_PTR(-result);\n+}\n+\n+static bool i40e_alloc_frame_zc(struct i40e_ring *rx_ring,\n+\t\t\t\tstruct i40e_rx_buffer *bi)\n+{\n+\tstruct xdp_umem *umem = rx_ring->xsk_umem;\n+\tvoid *addr = bi->addr;\n+\tu64 handle;\n+\n+\tif (addr) {\n+\t\trx_ring->rx_stats.page_reuse_count++;\n+\t\treturn true;\n+\t}\n+\n+\tif (!xsk_umem_peek_addr(umem, &handle)) {\n+\t\trx_ring->rx_stats.alloc_page_failed++;\n+\t\treturn false;\n+\t}\n+\n+\tbi->dma = xdp_umem_get_dma(umem, handle);\n+\tbi->addr = xdp_umem_get_data(umem, handle);\n+\n+\tbi->dma += umem->headroom + XDP_PACKET_HEADROOM;\n+\tbi->addr += umem->headroom + XDP_PACKET_HEADROOM;\n+\tbi->handle = handle + umem->headroom;\n+\n+\txsk_umem_discard_addr(umem);\n+\treturn true;\n+}\n+\n+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count)\n+{\n+\tu16 ntu = rx_ring->next_to_use;\n+\tunion i40e_rx_desc *rx_desc;\n+\tstruct i40e_rx_buffer *bi;\n+\n+\trx_desc = I40E_RX_DESC(rx_ring, ntu);\n+\tbi = &rx_ring->rx_bi[ntu];\n+\n+\tdo {\n+\t\tif (!i40e_alloc_frame_zc(rx_ring, bi))\n+\t\t\tgoto no_buffers;\n+\n+\t\t/* sync the buffer for use by the device */\n+\t\tdma_sync_single_range_for_device(rx_ring->dev, bi->dma, 0,\n+\t\t\t\t\t\t rx_ring->rx_buf_len,\n+\t\t\t\t\t\t DMA_BIDIRECTIONAL);\n+\n+\t\t/* Refresh the desc even if buffer_addrs didn't change\n+\t\t * because each write-back erases this info.\n+\t\t */\n+\t\trx_desc->read.pkt_addr = cpu_to_le64(bi->dma);\n+\n+\t\trx_desc++;\n+\t\tbi++;\n+\t\tntu++;\n+\t\tif (unlikely(ntu == rx_ring->count)) {\n+\t\t\trx_desc = I40E_RX_DESC(rx_ring, 0);\n+\t\t\tbi = rx_ring->rx_bi;\n+\t\t\tntu = 0;\n+\t\t}\n+\n+\t\t/* clear the status bits for the next_to_use descriptor */\n+\t\trx_desc->wb.qword1.status_error_len = 0;\n+\n+\t\tcleaned_count--;\n+\t} while (cleaned_count);\n+\n+\tif (rx_ring->next_to_use != ntu)\n+\t\ti40e_release_rx_desc(rx_ring, ntu);\n+\n+\treturn false;\n+\n+no_buffers:\n+\tif (rx_ring->next_to_use != ntu)\n+\t\ti40e_release_rx_desc(rx_ring, ntu);\n+\n+\t/* make sure to come back via polling to try again after\n+\t * allocation failure\n+\t */\n+\treturn true;\n+}\n+\n+static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,\n+\t\t\t\t\t\t const unsigned int size)\n+{\n+\tstruct i40e_rx_buffer *rx_buffer;\n+\n+\trx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];\n+\n+\t/* we are reusing so sync this buffer for CPU use */\n+\tdma_sync_single_range_for_cpu(rx_ring->dev,\n+\t\t\t\t rx_buffer->dma, 0,\n+\t\t\t\t size,\n+\t\t\t\t DMA_BIDIRECTIONAL);\n+\n+\treturn rx_buffer;\n+}\n+\n+static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,\n+\t\t\t\t struct i40e_rx_buffer *old_buff)\n+{\n+\tu64 mask = rx_ring->xsk_umem->props.chunk_mask;\n+\tu64 hr = rx_ring->xsk_umem->headroom;\n+\tu16 nta = rx_ring->next_to_alloc;\n+\tstruct i40e_rx_buffer *new_buff;\n+\n+\tnew_buff = &rx_ring->rx_bi[nta];\n+\n+\t/* update, and store next to alloc */\n+\tnta++;\n+\trx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;\n+\n+\t/* transfer page from old buffer to new buffer */\n+\tnew_buff->dma\t\t= old_buff->dma & mask;\n+\tnew_buff->addr\t\t= (void *)((u64)old_buff->addr & mask);\n+\tnew_buff->handle\t= old_buff->handle & mask;\n+\n+\tnew_buff->dma += hr + XDP_PACKET_HEADROOM;\n+\tnew_buff->addr += hr + XDP_PACKET_HEADROOM;\n+\tnew_buff->handle += hr;\n+}\n+\n+/* Called from the XDP return API in NAPI context. */\n+void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)\n+{\n+\tstruct i40e_rx_buffer *new_buff;\n+\tstruct i40e_ring *rx_ring;\n+\tu64 mask;\n+\tu16 nta;\n+\n+\trx_ring = container_of(alloc, struct i40e_ring, zca);\n+\tmask = rx_ring->xsk_umem->props.chunk_mask;\n+\n+\tnta = rx_ring->next_to_alloc;\n+\n+\tnew_buff = &rx_ring->rx_bi[nta];\n+\n+\t/* update, and store next to alloc */\n+\tnta++;\n+\trx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;\n+\n+\thandle &= mask;\n+\n+\tnew_buff->dma\t\t= xdp_umem_get_dma(rx_ring->xsk_umem, handle);\n+\tnew_buff->addr\t\t= xdp_umem_get_data(rx_ring->xsk_umem, handle);\n+\tnew_buff->handle\t= (u64)handle;\n+\n+\tnew_buff->dma += rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;\n+\tnew_buff->addr += rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;\n+\tnew_buff->handle += rx_ring->xsk_umem->headroom;\n+}\n+\n+static struct sk_buff *i40e_zc_frame_to_skb(struct i40e_ring *rx_ring,\n+\t\t\t\t\t struct i40e_rx_buffer *rx_buffer,\n+\t\t\t\t\t struct xdp_buff *xdp)\n+{\n+\t/* XXX implement alloc skb and copy */\n+\ti40e_reuse_rx_buffer_zc(rx_ring, rx_buffer);\n+\treturn NULL;\n+}\n+\n+static void i40e_clean_programming_status_zc(struct i40e_ring *rx_ring,\n+\t\t\t\t\t union i40e_rx_desc *rx_desc,\n+\t\t\t\t\t u64 qw)\n+{\n+\tstruct i40e_rx_buffer *rx_buffer;\n+\tu32 ntc = rx_ring->next_to_clean;\n+\tu8 id;\n+\n+\t/* fetch, update, and store next to clean */\n+\trx_buffer = &rx_ring->rx_bi[ntc++];\n+\tntc = (ntc < rx_ring->count) ? ntc : 0;\n+\trx_ring->next_to_clean = ntc;\n+\n+\tprefetch(I40E_RX_DESC(rx_ring, ntc));\n+\n+\t/* place unused page back on the ring */\n+\ti40e_reuse_rx_buffer_zc(rx_ring, rx_buffer);\n+\trx_ring->rx_stats.page_reuse_count++;\n+\n+\t/* clear contents of buffer_info */\n+\trx_buffer->addr = NULL;\n+\n+\tid = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>\n+\t\t I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;\n+\n+\tif (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)\n+\t\ti40e_fd_handle_status(rx_ring, rx_desc, id);\n+}\n+\n+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)\n+{\n+\tunsigned int total_rx_bytes = 0, total_rx_packets = 0;\n+\tu16 cleaned_count = I40E_DESC_UNUSED(rx_ring);\n+\tbool failure = false, xdp_xmit = false;\n+\tstruct sk_buff *skb;\n+\tstruct xdp_buff xdp;\n+\n+\txdp.rxq = &rx_ring->xdp_rxq;\n+\n+\twhile (likely(total_rx_packets < (unsigned int)budget)) {\n+\t\tstruct i40e_rx_buffer *rx_buffer;\n+\t\tunion i40e_rx_desc *rx_desc;\n+\t\tunsigned int size;\n+\t\tu16 vlan_tag;\n+\t\tu8 rx_ptype;\n+\t\tu64 qword;\n+\t\tu32 ntc;\n+\n+\t\t/* return some buffers to hardware, one at a time is too slow */\n+\t\tif (cleaned_count >= I40E_RX_BUFFER_WRITE) {\n+\t\t\tfailure = failure ||\n+\t\t\t\t i40e_alloc_rx_buffers_zc(rx_ring,\n+\t\t\t\t\t\t\t cleaned_count);\n+\t\t\tcleaned_count = 0;\n+\t\t}\n+\n+\t\trx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);\n+\n+\t\t/* status_error_len will always be zero for unused descriptors\n+\t\t * because it's cleared in cleanup, and overlaps with hdr_addr\n+\t\t * which is always zero because packet split isn't used, if the\n+\t\t * hardware wrote DD then the length will be non-zero\n+\t\t */\n+\t\tqword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);\n+\n+\t\t/* This memory barrier is needed to keep us from reading\n+\t\t * any other fields out of the rx_desc until we have\n+\t\t * verified the descriptor has been written back.\n+\t\t */\n+\t\tdma_rmb();\n+\n+\t\tif (unlikely(i40e_rx_is_programming_status(qword))) {\n+\t\t\ti40e_clean_programming_status_zc(rx_ring, rx_desc,\n+\t\t\t\t\t\t\t qword);\n+\t\t\tcleaned_count++;\n+\t\t\tcontinue;\n+\t\t}\n+\t\tsize = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>\n+\t\t I40E_RXD_QW1_LENGTH_PBUF_SHIFT;\n+\t\tif (!size)\n+\t\t\tbreak;\n+\n+\t\trx_buffer = i40e_get_rx_buffer_zc(rx_ring, size);\n+\n+\t\t/* retrieve a buffer from the ring */\n+\t\txdp.data = rx_buffer->addr;\n+\t\txdp_set_data_meta_invalid(&xdp);\n+\t\txdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;\n+\t\txdp.data_end = xdp.data + size;\n+\t\txdp.handle = rx_buffer->handle;\n+\n+\t\tskb = i40e_run_xdp_zc(rx_ring, &xdp);\n+\n+\t\tif (IS_ERR(skb)) {\n+\t\t\tif (PTR_ERR(skb) == -I40E_XDP_TX)\n+\t\t\t\txdp_xmit = true;\n+\t\t\telse\n+\t\t\t\ti40e_reuse_rx_buffer_zc(rx_ring, rx_buffer);\n+\t\t\ttotal_rx_bytes += size;\n+\t\t\ttotal_rx_packets++;\n+\t\t} else {\n+\t\t\tskb = i40e_zc_frame_to_skb(rx_ring, rx_buffer, &xdp);\n+\t\t\tif (!skb) {\n+\t\t\t\trx_ring->rx_stats.alloc_buff_failed++;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\n+\t\trx_buffer->addr = NULL;\n+\t\tcleaned_count++;\n+\n+\t\t/* don't care about non-EOP frames in XDP mode */\n+\t\tntc = rx_ring->next_to_clean + 1;\n+\t\tntc = (ntc < rx_ring->count) ? ntc : 0;\n+\t\trx_ring->next_to_clean = ntc;\n+\t\tprefetch(I40E_RX_DESC(rx_ring, ntc));\n+\n+\t\tif (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {\n+\t\t\tskb = NULL;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/* probably a little skewed due to removing CRC */\n+\t\ttotal_rx_bytes += skb->len;\n+\n+\t\tqword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);\n+\t\trx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>\n+\t\t\t I40E_RXD_QW1_PTYPE_SHIFT;\n+\n+\t\t/* populate checksum, VLAN, and protocol */\n+\t\ti40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);\n+\n+\t\tvlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?\n+\t\t\t le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;\n+\n+\t\ti40e_receive_skb(rx_ring, skb, vlan_tag);\n+\t\tskb = NULL;\n+\n+\t\t/* update budget accounting */\n+\t\ttotal_rx_packets++;\n+\t}\n+\n+\tif (xdp_xmit) {\n+\t\tstruct i40e_ring *xdp_ring =\n+\t\t\trx_ring->vsi->xdp_rings[rx_ring->queue_index];\n+\n+\t\ti40e_xdp_ring_update_tail(xdp_ring);\n+\t\txdp_do_flush_map();\n+\t}\n+\n+\tu64_stats_update_begin(&rx_ring->syncp);\n+\trx_ring->stats.packets += total_rx_packets;\n+\trx_ring->stats.bytes += total_rx_bytes;\n+\tu64_stats_update_end(&rx_ring->syncp);\n+\trx_ring->q_vector->rx.total_packets += total_rx_packets;\n+\trx_ring->q_vector->rx.total_bytes += total_rx_bytes;\n+\n+\t/* guarantee a trip back through this routine if there was a failure */\n+\treturn failure ? budget : (int)total_rx_packets;\n+}\n+\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h\nnew file mode 100644\nindex 000000000000..757ac5ca8511\n--- /dev/null\n+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h\n@@ -0,0 +1,17 @@\n+/* SPDX-License-Identifier: GPL-2.0 */\n+/* Copyright(c) 2018 Intel Corporation. */\n+\n+#ifndef _I40E_XSK_H_\n+#define _I40E_XSK_H_\n+\n+struct i40e_vsi;\n+struct xdp_umem;\n+struct zero_copy_allocator;\n+\n+int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,\n+\t\t\tu16 qid);\n+void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);\n+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);\n+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);\n+\n+#endif /* _I40E_XSK_H_ */\ndiff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h\nindex 9fe472f2ac95..ec8fd3314097 100644\n--- a/include/net/xdp_sock.h\n+++ b/include/net/xdp_sock.h\n@@ -94,6 +94,25 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)\n {\n \treturn false;\n }\n+\n+static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)\n+{\n+\treturn NULL;\n+}\n+\n+static inline void xsk_umem_discard_addr(struct xdp_umem *umem)\n+{\n+}\n #endif /* CONFIG_XDP_SOCKETS */\n \n+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)\n+{\n+\treturn umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));\n+}\n+\n+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)\n+{\n+\treturn umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));\n+}\n+\n #endif /* _LINUX_XDP_SOCK_H */\ndiff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h\nindex f11560334f88..c8be1ad3eb88 100644\n--- a/net/xdp/xdp_umem.h\n+++ b/net/xdp/xdp_umem.h\n@@ -8,16 +8,6 @@\n \n #include <net/xdp_sock.h>\n \n-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)\n-{\n-\treturn umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));\n-}\n-\n-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)\n-{\n-\treturn umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));\n-}\n-\n int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,\n \t\t\tu32 queue_id, u16 flags);\n bool xdp_umem_validate_queues(struct xdp_umem *umem);\n", "prefixes": [ "bpf-next", "09/11" ] }