Patch Detail

GET /api/patches/887919/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 887919,
    "url": "http://patchwork.ozlabs.org/api/patches/887919/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20180319215644.31978-10-jeffrey.t.kirsher@intel.com/",
    "project": {
        "id": 46,
        "url": "http://patchwork.ozlabs.org/api/projects/46/?format=api",
        "name": "Intel Wired Ethernet development",
        "link_name": "intel-wired-lan",
        "list_id": "intel-wired-lan.osuosl.org",
        "list_email": "intel-wired-lan@osuosl.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20180319215644.31978-10-jeffrey.t.kirsher@intel.com>",
    "list_archive_url": null,
    "date": "2018-03-19T21:56:39",
    "name": "[v3,10/15] ice: Implement transmit and NAPI support",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": false,
    "hash": "2877b790220d56b2e8dfa2cf677d84e247194499",
    "submitter": {
        "id": 473,
        "url": "http://patchwork.ozlabs.org/api/people/473/?format=api",
        "name": "Kirsher, Jeffrey T",
        "email": "jeffrey.t.kirsher@intel.com"
    },
    "delegate": {
        "id": 68,
        "url": "http://patchwork.ozlabs.org/api/users/68/?format=api",
        "username": "jtkirshe",
        "first_name": "Jeff",
        "last_name": "Kirsher",
        "email": "jeffrey.t.kirsher@intel.com"
    },
    "mbox": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20180319215644.31978-10-jeffrey.t.kirsher@intel.com/mbox/",
    "series": [
        {
            "id": 34702,
            "url": "http://patchwork.ozlabs.org/api/series/34702/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/list/?series=34702",
            "date": "2018-03-19T21:56:30",
            "name": "[v3,01/15] ice: Add basic driver framework for Intel(R) E800 Series",
            "version": 3,
            "mbox": "http://patchwork.ozlabs.org/series/34702/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/887919/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/887919/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<intel-wired-lan-bounces@osuosl.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "intel-wired-lan@lists.osuosl.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@bilbo.ozlabs.org",
            "intel-wired-lan@lists.osuosl.org"
        ],
        "Authentication-Results": [
            "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=osuosl.org\n\t(client-ip=140.211.166.136; helo=silver.osuosl.org;\n\tenvelope-from=intel-wired-lan-bounces@osuosl.org;\n\treceiver=<UNKNOWN>)",
            "ozlabs.org;\n\tdmarc=none (p=none dis=none) header.from=intel.com"
        ],
        "Received": [
            "from silver.osuosl.org (smtp3.osuosl.org [140.211.166.136])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 404qf913jGz9sLw\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 20 Mar 2018 08:56:48 +1100 (AEDT)",
            "from localhost (localhost [127.0.0.1])\n\tby silver.osuosl.org (Postfix) with ESMTP id 7F22426021;\n\tMon, 19 Mar 2018 21:56:47 +0000 (UTC)",
            "from silver.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id 1rRucwlOZKpR; Mon, 19 Mar 2018 21:56:37 +0000 (UTC)",
            "from ash.osuosl.org (ash.osuosl.org [140.211.166.34])\n\tby silver.osuosl.org (Postfix) with ESMTP id 9F4EC265BC;\n\tMon, 19 Mar 2018 21:56:20 +0000 (UTC)",
            "from hemlock.osuosl.org (smtp2.osuosl.org [140.211.166.133])\n\tby ash.osuosl.org (Postfix) with ESMTP id 842C11C2272\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 19 Mar 2018 21:56:18 +0000 (UTC)",
            "from localhost (localhost [127.0.0.1])\n\tby hemlock.osuosl.org (Postfix) with ESMTP id 7719688ECF\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 19 Mar 2018 21:56:18 +0000 (UTC)",
            "from hemlock.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id KpQaS+c0cLxy for <intel-wired-lan@lists.osuosl.org>;\n\tMon, 19 Mar 2018 21:56:13 +0000 (UTC)",
            "from mga02.intel.com (mga02.intel.com [134.134.136.20])\n\tby hemlock.osuosl.org (Postfix) with ESMTPS id 564AA8906F\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 19 Mar 2018 21:56:09 +0000 (UTC)",
            "from orsmga008.jf.intel.com ([10.7.209.65])\n\tby orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t19 Mar 2018 14:56:07 -0700",
            "from jtkirshe-nuc.jf.intel.com ([134.134.177.59])\n\tby orsmga008.jf.intel.com with ESMTP; 19 Mar 2018 14:56:07 -0700"
        ],
        "X-Virus-Scanned": [
            "amavisd-new at osuosl.org",
            "amavisd-new at osuosl.org"
        ],
        "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6",
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.48,332,1517904000\"; d=\"scan'208\";a=\"26667087\"",
        "From": "Jeff Kirsher <jeffrey.t.kirsher@intel.com>",
        "To": "intel-wired-lan@lists.osuosl.org",
        "Date": "Mon, 19 Mar 2018 14:56:39 -0700",
        "Message-Id": "<20180319215644.31978-10-jeffrey.t.kirsher@intel.com>",
        "X-Mailer": "git-send-email 2.14.3",
        "In-Reply-To": "<20180319215644.31978-1-jeffrey.t.kirsher@intel.com>",
        "References": "<20180319215644.31978-1-jeffrey.t.kirsher@intel.com>",
        "Subject": "[Intel-wired-lan] [PATCH v3 10/15] ice: Implement transmit and NAPI\n\tsupport",
        "X-BeenThere": "intel-wired-lan@osuosl.org",
        "X-Mailman-Version": "2.1.24",
        "Precedence": "list",
        "List-Id": "Intel Wired Ethernet Linux Kernel Driver Development\n\t<intel-wired-lan.osuosl.org>",
        "List-Unsubscribe": "<https://lists.osuosl.org/mailman/options/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=unsubscribe>",
        "List-Archive": "<http://lists.osuosl.org/pipermail/intel-wired-lan/>",
        "List-Post": "<mailto:intel-wired-lan@osuosl.org>",
        "List-Help": "<mailto:intel-wired-lan-request@osuosl.org?subject=help>",
        "List-Subscribe": "<https://lists.osuosl.org/mailman/listinfo/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=subscribe>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=\"us-ascii\"",
        "Content-Transfer-Encoding": "7bit",
        "Errors-To": "intel-wired-lan-bounces@osuosl.org",
        "Sender": "\"Intel-wired-lan\" <intel-wired-lan-bounces@osuosl.org>"
    },
    "content": "From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>\n\nThis patch implements ice_start_xmit (the handler for ndo_start_xmit) and\nrelated functions. ice_start_xmit ultimately calls ice_tx_map, where the\nTx descriptor is built and posted to the hardware by bumping the ring tail.\n\nThis patch also implements ice_napi_poll, which is invoked when there's an\ninterrupt on the VSI's queues. The interrupt can be due to either a\ncompleted Tx or an Rx event. In case of a completed Tx/Rx event, resources\nare reclaimed. Additionally, in case of an Rx event, the skb is fetched\nand passed up to the network stack.\n\nSigned-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>\n---\n drivers/net/ethernet/intel/ice/ice.h           |    1 +\n drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h |   46 ++\n drivers/net/ethernet/intel/ice/ice_main.c      |   55 ++\n drivers/net/ethernet/intel/ice/ice_txrx.c      | 1026 +++++++++++++++++++++++-\n drivers/net/ethernet/intel/ice/ice_txrx.h      |   45 ++\n 5 files changed, 1171 insertions(+), 2 deletions(-)",
    "diff": "diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h\nindex e3ec19099e37..7998e57994bf 100644\n--- a/drivers/net/ethernet/intel/ice/ice.h\n+++ b/drivers/net/ethernet/intel/ice/ice.h\n@@ -74,6 +74,7 @@\n \t\t(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \\\n \t\t  ICE_AQ_VSI_UP_TABLE_UP##i##_M)\n \n+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))\n #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))\n \n #define ice_for_each_txq(vsi, i) \\\ndiff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h\nindex 0cdf1ae480cf..c930f3e06ecc 100644\n--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h\n+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h\n@@ -145,6 +145,33 @@ enum ice_rx_flg64_bits {\n \tICE_RXFLG_RSVD\t\t= 63\n };\n \n+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */\n+#define ICE_RX_FLEX_DESC_PTYPE_M\t(0x3FF) /* 10-bits */\n+\n+/* for ice_32byte_rx_flex_desc.pkt_length member */\n+#define ICE_RX_FLX_DESC_PKT_LEN_M\t(0x3FFF) /* 14-bits */\n+\n+enum ice_rx_flex_desc_status_error_0_bits {\n+\t/* Note: These are predefined bit offsets */\n+\tICE_RX_FLEX_DESC_STATUS0_DD_S = 0,\n+\tICE_RX_FLEX_DESC_STATUS0_EOF_S,\n+\tICE_RX_FLEX_DESC_STATUS0_HBO_S,\n+\tICE_RX_FLEX_DESC_STATUS0_L3L4P_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,\n+\tICE_RX_FLEX_DESC_STATUS0_LPBK_S,\n+\tICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,\n+\tICE_RX_FLEX_DESC_STATUS0_RXE_S,\n+\tICE_RX_FLEX_DESC_STATUS0_CRCP_S,\n+\tICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,\n+\tICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,\n+\tICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,\n+\tICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */\n+};\n+\n #define ICE_RXQ_CTX_SIZE_DWORDS\t\t8\n #define ICE_RXQ_CTX_SZ\t\t\t(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))\n \n@@ -215,6 +242,25 @@ struct ice_tx_desc {\n \t__le64 cmd_type_offset_bsz;\n };\n \n+enum ice_tx_desc_dtype_value {\n+\tICE_TX_DESC_DTYPE_DATA\t\t= 0x0,\n+\tICE_TX_DESC_DTYPE_CTX\t\t= 0x1,\n+\t/* DESC_DONE - HW has completed write-back of descriptor */\n+\tICE_TX_DESC_DTYPE_DESC_DONE\t= 0xF,\n+};\n+\n+#define ICE_TXD_QW1_CMD_S\t4\n+#define ICE_TXD_QW1_CMD_M\t(0xFFFUL << ICE_TXD_QW1_CMD_S)\n+\n+enum ice_tx_desc_cmd_bits {\n+\tICE_TX_DESC_CMD_EOP\t\t\t= 0x0001,\n+\tICE_TX_DESC_CMD_RS\t\t\t= 0x0002,\n+};\n+\n+#define ICE_TXD_QW1_OFFSET_S\t16\n+#define ICE_TXD_QW1_TX_BUF_SZ_S\t34\n+#define ICE_TXD_QW1_L2TAG1_S\t48\n+\n #define ICE_LAN_TXQ_MAX_QGRPS\t127\n #define ICE_LAN_TXQ_MAX_QDIS\t1023\n \ndiff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c\nindex afb400a1f1d2..b802cac8376c 100644\n--- a/drivers/net/ethernet/intel/ice/ice_main.c\n+++ b/drivers/net/ethernet/intel/ice/ice_main.c\n@@ -1272,6 +1272,23 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors)\n \treturn -ENOMEM;\n }\n \n+/**\n+ * ice_msix_clean_rings - MSIX mode Interrupt Handler\n+ * @irq: interrupt number\n+ * @data: pointer to a q_vector\n+ */\n+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)\n+{\n+\tstruct ice_q_vector *q_vector = (struct ice_q_vector *)data;\n+\n+\tif (!q_vector->tx.ring && !q_vector->rx.ring)\n+\t\treturn IRQ_HANDLED;\n+\n+\tnapi_schedule(&q_vector->napi);\n+\n+\treturn IRQ_HANDLED;\n+}\n+\n /**\n  * ice_vsi_alloc - Allocates the next available struct vsi in the PF\n  * @pf: board private structure\n@@ -1312,6 +1329,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type)\n \t\tif (ice_vsi_alloc_arrays(vsi, true))\n \t\t\tgoto err_rings;\n \n+\t\t/* Setup default MSIX irq handler for VSI */\n+\t\tvsi->irq_handler = ice_msix_clean_rings;\n \t\tbreak;\n \tdefault:\n \t\tdev_warn(&pf->pdev->dev, \"Unknown VSI type %d\\n\", vsi->type);\n@@ -1755,6 +1774,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)\n \tif (cpu_online(v_idx))\n \t\tcpumask_set_cpu(v_idx, &q_vector->affinity_mask);\n \n+\tif (vsi->netdev)\n+\t\tnetif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,\n+\t\t\t       NAPI_POLL_WEIGHT);\n \t/* tie q_vector and vsi together */\n \tvsi->q_vectors[v_idx] = q_vector;\n \n@@ -2928,6 +2950,21 @@ static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi)\n \treturn 0;\n }\n \n+/**\n+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI\n+ * @vsi: the VSI being configured\n+ */\n+static void ice_napi_enable_all(struct ice_vsi *vsi)\n+{\n+\tint q_idx;\n+\n+\tif (!vsi->netdev)\n+\t\treturn;\n+\n+\tfor (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)\n+\t\tnapi_enable(&vsi->q_vectors[q_idx]->napi);\n+}\n+\n /**\n  * ice_up_complete - Finish the last steps of bringing up a connection\n  * @vsi: The VSI being configured\n@@ -2953,6 +2990,7 @@ static int ice_up_complete(struct ice_vsi *vsi)\n \t\treturn err;\n \n \tclear_bit(__ICE_DOWN, vsi->state);\n+\tice_napi_enable_all(vsi);\n \tice_vsi_ena_irq(vsi);\n \n \tif (vsi->port_info &&\n@@ -2968,6 +3006,21 @@ static int ice_up_complete(struct ice_vsi *vsi)\n \treturn err;\n }\n \n+/**\n+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI\n+ * @vsi: VSI having NAPI disabled\n+ */\n+static void ice_napi_disable_all(struct ice_vsi *vsi)\n+{\n+\tint q_idx;\n+\n+\tif (!vsi->netdev)\n+\t\treturn;\n+\n+\tfor (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)\n+\t\tnapi_disable(&vsi->q_vectors[q_idx]->napi);\n+}\n+\n /**\n  * ice_down - Shutdown the connection\n  * @vsi: The VSI being stopped\n@@ -2986,6 +3039,7 @@ static int ice_down(struct ice_vsi *vsi)\n \n \tice_vsi_dis_irq(vsi);\n \terr = ice_vsi_stop_tx_rx_rings(vsi);\n+\tice_napi_disable_all(vsi);\n \n \tice_for_each_txq(vsi, i)\n \t\tice_clean_tx_ring(vsi->tx_rings[i]);\n@@ -3265,4 +3319,5 @@ static int ice_stop(struct net_device *netdev)\n static const struct net_device_ops ice_netdev_ops = {\n \t.ndo_open = ice_open,\n \t.ndo_stop = ice_stop,\n+\t.ndo_start_xmit = ice_start_xmit,\n };\ndiff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c\nindex 002c26a4bca6..5775cdde57e3 100644\n--- a/drivers/net/ethernet/intel/ice/ice_txrx.c\n+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c\n@@ -21,6 +21,8 @@\n #include <linux/mm.h>\n #include \"ice.h\"\n \n+#define ICE_RX_HDR_SIZE\t\t256\n+\n /**\n  * ice_unmap_and_free_tx_buf - Release a Tx buffer\n  * @ring: the ring that owns the buffer\n@@ -106,6 +108,129 @@ void ice_free_tx_ring(struct ice_ring *tx_ring)\n \t}\n }\n \n+/**\n+ * ice_clean_tx_irq - Reclaim resources after transmit completes\n+ * @vsi: the VSI we care about\n+ * @tx_ring: Tx ring to clean\n+ * @napi_budget: Used to determine if we are in netpoll\n+ *\n+ * Returns true if there's any budget left (e.g. the clean is finished)\n+ */\n+static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring,\n+\t\t\t     int napi_budget)\n+{\n+\tunsigned int total_bytes = 0, total_pkts = 0;\n+\tunsigned int budget = vsi->work_lmt;\n+\ts16 i = tx_ring->next_to_clean;\n+\tstruct ice_tx_desc *tx_desc;\n+\tstruct ice_tx_buf *tx_buf;\n+\n+\ttx_buf = &tx_ring->tx_buf[i];\n+\ttx_desc = ICE_TX_DESC(tx_ring, i);\n+\ti -= tx_ring->count;\n+\n+\tdo {\n+\t\tstruct ice_tx_desc *eop_desc = tx_buf->next_to_watch;\n+\n+\t\t/* if next_to_watch is not set then there is no work pending */\n+\t\tif (!eop_desc)\n+\t\t\tbreak;\n+\n+\t\tsmp_rmb();\t/* prevent any other reads prior to eop_desc */\n+\n+\t\t/* if the descriptor isn't done, no work yet to do */\n+\t\tif (!(eop_desc->cmd_type_offset_bsz &\n+\t\t      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))\n+\t\t\tbreak;\n+\n+\t\t/* clear next_to_watch to prevent false hangs */\n+\t\ttx_buf->next_to_watch = NULL;\n+\n+\t\t/* update the statistics for this packet */\n+\t\ttotal_bytes += tx_buf->bytecount;\n+\t\ttotal_pkts += tx_buf->gso_segs;\n+\n+\t\t/* free the skb */\n+\t\tnapi_consume_skb(tx_buf->skb, napi_budget);\n+\n+\t\t/* unmap skb header data */\n+\t\tdma_unmap_single(tx_ring->dev,\n+\t\t\t\t dma_unmap_addr(tx_buf, dma),\n+\t\t\t\t dma_unmap_len(tx_buf, len),\n+\t\t\t\t DMA_TO_DEVICE);\n+\n+\t\t/* clear tx_buf data */\n+\t\ttx_buf->skb = NULL;\n+\t\tdma_unmap_len_set(tx_buf, len, 0);\n+\n+\t\t/* unmap remaining buffers */\n+\t\twhile (tx_desc != eop_desc) {\n+\t\t\ttx_buf++;\n+\t\t\ttx_desc++;\n+\t\t\ti++;\n+\t\t\tif (unlikely(!i)) {\n+\t\t\t\ti -= tx_ring->count;\n+\t\t\t\ttx_buf = tx_ring->tx_buf;\n+\t\t\t\ttx_desc = ICE_TX_DESC(tx_ring, 0);\n+\t\t\t}\n+\n+\t\t\t/* unmap any remaining paged data */\n+\t\t\tif (dma_unmap_len(tx_buf, len)) {\n+\t\t\t\tdma_unmap_page(tx_ring->dev,\n+\t\t\t\t\t       dma_unmap_addr(tx_buf, dma),\n+\t\t\t\t\t       dma_unmap_len(tx_buf, len),\n+\t\t\t\t\t       DMA_TO_DEVICE);\n+\t\t\t\tdma_unmap_len_set(tx_buf, len, 0);\n+\t\t\t}\n+\t\t}\n+\n+\t\t/* move us one more past the eop_desc for start of next pkt */\n+\t\ttx_buf++;\n+\t\ttx_desc++;\n+\t\ti++;\n+\t\tif (unlikely(!i)) {\n+\t\t\ti -= tx_ring->count;\n+\t\t\ttx_buf = tx_ring->tx_buf;\n+\t\t\ttx_desc = ICE_TX_DESC(tx_ring, 0);\n+\t\t}\n+\n+\t\tprefetch(tx_desc);\n+\n+\t\t/* update budget accounting */\n+\t\tbudget--;\n+\t} while (likely(budget));\n+\n+\ti += tx_ring->count;\n+\ttx_ring->next_to_clean = i;\n+\tu64_stats_update_begin(&tx_ring->syncp);\n+\ttx_ring->stats.bytes += total_bytes;\n+\ttx_ring->stats.pkts += total_pkts;\n+\tu64_stats_update_end(&tx_ring->syncp);\n+\ttx_ring->q_vector->tx.total_bytes += total_bytes;\n+\ttx_ring->q_vector->tx.total_pkts += total_pkts;\n+\n+\tnetdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,\n+\t\t\t\t  total_bytes);\n+\n+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))\n+\tif (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&\n+\t\t     (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {\n+\t\t/* Make sure that anybody stopping the queue after this\n+\t\t * sees the new next_to_clean.\n+\t\t */\n+\t\tsmp_mb();\n+\t\tif (__netif_subqueue_stopped(tx_ring->netdev,\n+\t\t\t\t\t     tx_ring->q_index) &&\n+\t\t   !test_bit(__ICE_DOWN, vsi->state)) {\n+\t\t\tnetif_wake_subqueue(tx_ring->netdev,\n+\t\t\t\t\t    tx_ring->q_index);\n+\t\t\t++tx_ring->tx_stats.restart_q;\n+\t\t}\n+\t}\n+\n+\treturn !!budget;\n+}\n+\n /**\n  * ice_setup_tx_ring - Allocate the Tx descriptors\n  * @tx_ring: the tx ring to set up\n@@ -288,13 +413,17 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,\n \tdma_addr_t dma;\n \n \t/* since we are recycling buffers we should seldom need to alloc */\n-\tif (likely(page))\n+\tif (likely(page)) {\n+\t\trx_ring->rx_stats.page_reuse_count++;\n \t\treturn true;\n+\t}\n \n \t/* alloc new page for storage */\n \tpage = alloc_page(GFP_ATOMIC | __GFP_NOWARN);\n-\tif (unlikely(!page))\n+\tif (unlikely(!page)) {\n+\t\trx_ring->rx_stats.alloc_page_failed++;\n \t\treturn false;\n+\t}\n \n \t/* map page for use */\n \tdma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);\n@@ -304,6 +433,7 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,\n \t */\n \tif (dma_mapping_error(rx_ring->dev, dma)) {\n \t\t__free_pages(page, 0);\n+\t\trx_ring->rx_stats.alloc_page_failed++;\n \t\treturn false;\n \t}\n \n@@ -373,3 +503,895 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)\n \t */\n \treturn true;\n }\n+\n+/**\n+ * ice_page_is_reserved - check if reuse is possible\n+ * @page: page struct to check\n+ */\n+static bool ice_page_is_reserved(struct page *page)\n+{\n+\treturn (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);\n+}\n+\n+/**\n+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff\n+ * @rx_buf: buffer containing page to add\n+ * @rx_desc: descriptor containing length of buffer written by hardware\n+ * @skb: sk_buf to place the data into\n+ *\n+ * This function will add the data contained in rx_buf->page to the skb.\n+ * This is done either through a direct copy if the data in the buffer is\n+ * less than the skb header size, otherwise it will just attach the page as\n+ * a frag to the skb.\n+ *\n+ * The function will then update the page offset if necessary and return\n+ * true if the buffer can be reused by the adapter.\n+ */\n+static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,\n+\t\t\t    union ice_32b_rx_flex_desc *rx_desc,\n+\t\t\t    struct sk_buff *skb)\n+{\n+#if (PAGE_SIZE < 8192)\n+\tunsigned int truesize = ICE_RXBUF_2048;\n+#else\n+\tunsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;\n+\tunsigned int truesize;\n+#endif /* PAGE_SIZE < 8192) */\n+\n+\tstruct page *page;\n+\tunsigned int size;\n+\n+\tsize = le16_to_cpu(rx_desc->wb.pkt_len) &\n+\t\tICE_RX_FLX_DESC_PKT_LEN_M;\n+\n+\tpage = rx_buf->page;\n+\n+#if (PAGE_SIZE >= 8192)\n+\ttruesize = ALIGN(size, L1_CACHE_BYTES);\n+#endif /* PAGE_SIZE >= 8192) */\n+\n+\t/* will the data fit in the skb we allocated? if so, just\n+\t * copy it as it is pretty small anyway\n+\t */\n+\tif (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {\n+\t\tunsigned char *va = page_address(page) + rx_buf->page_offset;\n+\n+\t\tmemcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));\n+\n+\t\t/* page is not reserved, we can reuse buffer as-is */\n+\t\tif (likely(!ice_page_is_reserved(page)))\n+\t\t\treturn true;\n+\n+\t\t/* this page cannot be reused so discard it */\n+\t\t__free_pages(page, 0);\n+\t\treturn false;\n+\t}\n+\n+\tskb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,\n+\t\t\trx_buf->page_offset, size, truesize);\n+\n+\t/* avoid re-using remote pages */\n+\tif (unlikely(ice_page_is_reserved(page)))\n+\t\treturn false;\n+\n+#if (PAGE_SIZE < 8192)\n+\t/* if we are only owner of page we can reuse it */\n+\tif (unlikely(page_count(page) != 1))\n+\t\treturn false;\n+\n+\t/* flip page offset to other buffer */\n+\trx_buf->page_offset ^= truesize;\n+#else\n+\t/* move offset up to the next cache line */\n+\trx_buf->page_offset += truesize;\n+\n+\tif (rx_buf->page_offset > last_offset)\n+\t\treturn false;\n+#endif /* PAGE_SIZE < 8192) */\n+\n+\t/* Even if we own the page, we are not allowed to use atomic_set()\n+\t * This would break get_page_unless_zero() users.\n+\t */\n+\tget_page(rx_buf->page);\n+\n+\treturn true;\n+}\n+\n+/**\n+ * ice_reuse_rx_page - page flip buffer and store it back on the ring\n+ * @rx_ring: rx descriptor ring to store buffers on\n+ * @old_buf: donor buffer to have page reused\n+ *\n+ * Synchronizes page for reuse by the adapter\n+ */\n+static void ice_reuse_rx_page(struct ice_ring *rx_ring,\n+\t\t\t      struct ice_rx_buf *old_buf)\n+{\n+\tu16 nta = rx_ring->next_to_alloc;\n+\tstruct ice_rx_buf *new_buf;\n+\n+\tnew_buf = &rx_ring->rx_buf[nta];\n+\n+\t/* update, and store next to alloc */\n+\tnta++;\n+\trx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;\n+\n+\t/* transfer page from old buffer to new buffer */\n+\t*new_buf = *old_buf;\n+}\n+\n+/**\n+ * ice_fetch_rx_buf - Allocate skb and populate it\n+ * @rx_ring: rx descriptor ring to transact packets on\n+ * @rx_desc: descriptor containing info written by hardware\n+ *\n+ * This function allocates an skb on the fly, and populates it with the page\n+ * data from the current receive descriptor, taking care to set up the skb\n+ * correctly, as well as handling calling the page recycle function if\n+ * necessary.\n+ */\n+static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring,\n+\t\t\t\t\tunion ice_32b_rx_flex_desc *rx_desc)\n+{\n+\tstruct ice_rx_buf *rx_buf;\n+\tstruct sk_buff *skb;\n+\tstruct page *page;\n+\n+\trx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];\n+\tpage = rx_buf->page;\n+\tprefetchw(page);\n+\n+\tskb = rx_buf->skb;\n+\n+\tif (likely(!skb)) {\n+\t\tu8 *page_addr = page_address(page) + rx_buf->page_offset;\n+\n+\t\t/* prefetch first cache line of first page */\n+\t\tprefetch(page_addr);\n+#if L1_CACHE_BYTES < 128\n+\t\tprefetch((void *)(page_addr + L1_CACHE_BYTES));\n+#endif /* L1_CACHE_BYTES */\n+\n+\t\t/* allocate a skb to store the frags */\n+\t\tskb = __napi_alloc_skb(&rx_ring->q_vector->napi,\n+\t\t\t\t       ICE_RX_HDR_SIZE,\n+\t\t\t\t       GFP_ATOMIC | __GFP_NOWARN);\n+\t\tif (unlikely(!skb)) {\n+\t\t\trx_ring->rx_stats.alloc_buf_failed++;\n+\t\t\treturn NULL;\n+\t\t}\n+\n+\t\t/* we will be copying header into skb->data in\n+\t\t * pskb_may_pull so it is in our interest to prefetch\n+\t\t * it now to avoid a possible cache miss\n+\t\t */\n+\t\tprefetchw(skb->data);\n+\n+\t\tskb_record_rx_queue(skb, rx_ring->q_index);\n+\t} else {\n+\t\t/* we are reusing so sync this buffer for CPU use */\n+\t\tdma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,\n+\t\t\t\t\t      rx_buf->page_offset,\n+\t\t\t\t\t      ICE_RXBUF_2048,\n+\t\t\t\t\t      DMA_FROM_DEVICE);\n+\n+\t\trx_buf->skb = NULL;\n+\t}\n+\n+\t/* pull page into skb */\n+\tif (ice_add_rx_frag(rx_buf, rx_desc, skb)) {\n+\t\t/* hand second half of page back to the ring */\n+\t\tice_reuse_rx_page(rx_ring, rx_buf);\n+\t\trx_ring->rx_stats.page_reuse_count++;\n+\t} else {\n+\t\t/* we are not reusing the buffer so unmap it */\n+\t\tdma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,\n+\t\t\t       DMA_FROM_DEVICE);\n+\t}\n+\n+\t/* clear contents of buffer_info */\n+\trx_buf->page = NULL;\n+\n+\treturn skb;\n+}\n+\n+/**\n+ * ice_pull_tail - ice specific version of skb_pull_tail\n+ * @skb: pointer to current skb being adjusted\n+ *\n+ * This function is an ice specific version of __pskb_pull_tail.  The\n+ * main difference between this version and the original function is that\n+ * this function can make several assumptions about the state of things\n+ * that allow for significant optimizations versus the standard function.\n+ * As a result we can do things like drop a frag and maintain an accurate\n+ * truesize for the skb.\n+ */\n+static void ice_pull_tail(struct sk_buff *skb)\n+{\n+\tstruct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];\n+\tunsigned int pull_len;\n+\tunsigned char *va;\n+\n+\t/* it is valid to use page_address instead of kmap since we are\n+\t * working with pages allocated out of the lomem pool per\n+\t * alloc_page(GFP_ATOMIC)\n+\t */\n+\tva = skb_frag_address(frag);\n+\n+\t/* we need the header to contain the greater of either ETH_HLEN or\n+\t * 60 bytes if the skb->len is less than 60 for skb_pad.\n+\t */\n+\tpull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);\n+\n+\t/* align pull length to size of long to optimize memcpy performance */\n+\tskb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));\n+\n+\t/* update all of the pointers */\n+\tskb_frag_size_sub(frag, pull_len);\n+\tfrag->page_offset += pull_len;\n+\tskb->data_len -= pull_len;\n+\tskb->tail += pull_len;\n+}\n+\n+/**\n+ * ice_cleanup_headers - Correct empty headers\n+ * @skb: pointer to current skb being fixed\n+ *\n+ * Also address the case where we are pulling data in on pages only\n+ * and as such no data is present in the skb header.\n+ *\n+ * In addition if skb is not at least 60 bytes we need to pad it so that\n+ * it is large enough to qualify as a valid Ethernet frame.\n+ *\n+ * Returns true if an error was encountered and skb was freed.\n+ */\n+static bool ice_cleanup_headers(struct sk_buff *skb)\n+{\n+\t/* place header in linear portion of buffer */\n+\tif (skb_is_nonlinear(skb))\n+\t\tice_pull_tail(skb);\n+\n+\t/* if eth_skb_pad returns an error the skb was freed */\n+\tif (eth_skb_pad(skb))\n+\t\treturn true;\n+\n+\treturn false;\n+}\n+\n+/**\n+ * ice_test_staterr - tests bits in Rx descriptor status and error fields\n+ * @rx_desc: pointer to receive descriptor (in le64 format)\n+ * @stat_err_bits: value to mask\n+ *\n+ * This function does some fast chicanery in order to return the\n+ * value of the mask which is really only used for boolean tests.\n+ * The status_error_len doesn't need to be shifted because it begins\n+ * at offset zero.\n+ */\n+static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,\n+\t\t\t     const u16 stat_err_bits)\n+{\n+\treturn !!(rx_desc->wb.status_error0 &\n+\t\t  cpu_to_le16(stat_err_bits));\n+}\n+\n+/**\n+ * ice_is_non_eop - process handling of non-EOP buffers\n+ * @rx_ring: Rx ring being processed\n+ * @rx_desc: Rx descriptor for current buffer\n+ * @skb: Current socket buffer containing buffer in progress\n+ *\n+ * This function updates next to clean.  If the buffer is an EOP buffer\n+ * this function exits returning false, otherwise it will place the\n+ * sk_buff in the next buffer to be chained and return true indicating\n+ * that this is in fact a non-EOP buffer.\n+ */\n+static bool ice_is_non_eop(struct ice_ring *rx_ring,\n+\t\t\t   union ice_32b_rx_flex_desc *rx_desc,\n+\t\t\t   struct sk_buff *skb)\n+{\n+\tu32 ntc = rx_ring->next_to_clean + 1;\n+\n+\t/* fetch, update, and store next to clean */\n+\tntc = (ntc < rx_ring->count) ? ntc : 0;\n+\trx_ring->next_to_clean = ntc;\n+\n+\tprefetch(ICE_RX_DESC(rx_ring, ntc));\n+\n+\t/* if we are the last buffer then there is nothing else to do */\n+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)\n+\tif (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))\n+\t\treturn false;\n+\n+\t/* place skb in next buffer to be received */\n+\trx_ring->rx_buf[ntc].skb = skb;\n+\trx_ring->rx_stats.non_eop_descs++;\n+\n+\treturn true;\n+}\n+\n+/**\n+ * ice_receive_skb - Send a completed packet up the stack\n+ * @rx_ring: rx ring in play\n+ * @skb: packet to send up\n+ * @vlan_tag: vlan tag for packet\n+ *\n+ * This function sends the completed packet (via. skb) up the stack using\n+ * gro receive functions (with/without vlan tag)\n+ */\n+static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb,\n+\t\t\t    u16 vlan_tag)\n+{\n+\tif ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&\n+\t    (vlan_tag & VLAN_VID_MASK)) {\n+\t\t__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);\n+\t}\n+\tnapi_gro_receive(&rx_ring->q_vector->napi, skb);\n+}\n+\n+/**\n+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf\n+ * @rx_ring: rx descriptor ring to transact packets on\n+ * @budget: Total limit on number of packets to process\n+ *\n+ * This function provides a \"bounce buffer\" approach to Rx interrupt\n+ * processing.  The advantage to this is that on systems that have\n+ * expensive overhead for IOMMU access this provides a means of avoiding\n+ * it by maintaining the mapping of the page to the system.\n+ *\n+ * Returns amount of work completed\n+ */\n+static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)\n+{\n+\tunsigned int total_rx_bytes = 0, total_rx_pkts = 0;\n+\tu16 cleaned_count = ICE_DESC_UNUSED(rx_ring);\n+\tbool failure = false;\n+\n+\t/* start the loop to process RX packets bounded by 'budget' */\n+\twhile (likely(total_rx_pkts < (unsigned int)budget)) {\n+\t\tunion ice_32b_rx_flex_desc *rx_desc;\n+\t\tstruct sk_buff *skb;\n+\t\tu16 stat_err_bits;\n+\t\tu16 vlan_tag = 0;\n+\n+\t\t/* return some buffers to hardware, one at a time is too slow */\n+\t\tif (cleaned_count >= ICE_RX_BUF_WRITE) {\n+\t\t\tfailure = failure ||\n+\t\t\t\t  ice_alloc_rx_bufs(rx_ring, cleaned_count);\n+\t\t\tcleaned_count = 0;\n+\t\t}\n+\n+\t\t/* get the RX desc from RX ring based on 'next_to_clean' */\n+\t\trx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);\n+\n+\t\t/* status_error_len will always be zero for unused descriptors\n+\t\t * because it's cleared in cleanup, and overlaps with hdr_addr\n+\t\t * which is always zero because packet split isn't used, if the\n+\t\t * hardware wrote DD then it will be non-zero\n+\t\t */\n+\t\tstat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);\n+\t\tif (!ice_test_staterr(rx_desc, stat_err_bits))\n+\t\t\tbreak;\n+\n+\t\t/* This memory barrier is needed to keep us from reading\n+\t\t * any other fields out of the rx_desc until we know the\n+\t\t * DD bit is set.\n+\t\t */\n+\t\tdma_rmb();\n+\n+\t\t/* allocate (if needed) and populate skb */\n+\t\tskb = ice_fetch_rx_buf(rx_ring, rx_desc);\n+\t\tif (!skb)\n+\t\t\tbreak;\n+\n+\t\tcleaned_count++;\n+\n+\t\t/* skip if it is NOP desc */\n+\t\tif (ice_is_non_eop(rx_ring, rx_desc, skb))\n+\t\t\tcontinue;\n+\n+\t\tstat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);\n+\t\tif (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {\n+\t\t\tdev_kfree_skb_any(skb);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tstat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);\n+\t\tif (ice_test_staterr(rx_desc, stat_err_bits))\n+\t\t\tvlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);\n+\n+\t\t/* correct empty headers and pad skb if needed (to make valid\n+\t\t * ethernet frame\n+\t\t */\n+\t\tif (ice_cleanup_headers(skb)) {\n+\t\t\tskb = NULL;\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\t/* probably a little skewed due to removing CRC */\n+\t\ttotal_rx_bytes += skb->len;\n+\n+\t\t/* send completed skb up the stack */\n+\t\tice_receive_skb(rx_ring, skb, vlan_tag);\n+\n+\t\t/* update budget accounting */\n+\t\ttotal_rx_pkts++;\n+\t}\n+\n+\t/* update queue and vector specific stats */\n+\tu64_stats_update_begin(&rx_ring->syncp);\n+\trx_ring->stats.pkts += total_rx_pkts;\n+\trx_ring->stats.bytes += total_rx_bytes;\n+\tu64_stats_update_end(&rx_ring->syncp);\n+\trx_ring->q_vector->rx.total_pkts += total_rx_pkts;\n+\trx_ring->q_vector->rx.total_bytes += total_rx_bytes;\n+\n+\t/* guarantee a trip back through this routine if there was a failure */\n+\treturn failure ? budget : (int)total_rx_pkts;\n+}\n+\n+/**\n+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine\n+ * @napi: napi struct with our devices info in it\n+ * @budget: amount of work driver is allowed to do this pass, in packets\n+ *\n+ * This function will clean all queues associated with a q_vector.\n+ *\n+ * Returns the amount of work done\n+ */\n+int ice_napi_poll(struct napi_struct *napi, int budget)\n+{\n+\tstruct ice_q_vector *q_vector =\n+\t\t\t\tcontainer_of(napi, struct ice_q_vector, napi);\n+\tstruct ice_vsi *vsi = q_vector->vsi;\n+\tstruct ice_pf *pf = vsi->back;\n+\tbool clean_complete = true;\n+\tint budget_per_ring = 0;\n+\tstruct ice_ring *ring;\n+\tint work_done = 0;\n+\n+\t/* Since the actual Tx work is minimal, we can give the Tx a larger\n+\t * budget and be more aggressive about cleaning up the Tx descriptors.\n+\t */\n+\tice_for_each_ring(ring, q_vector->tx)\n+\t\tif (!ice_clean_tx_irq(vsi, ring, budget))\n+\t\t\tclean_complete = false;\n+\n+\t/* Handle case where we are called by netpoll with a budget of 0 */\n+\tif (budget <= 0)\n+\t\treturn budget;\n+\n+\t/* We attempt to distribute budget to each Rx queue fairly, but don't\n+\t * allow the budget to go below 1 because that would exit polling early.\n+\t */\n+\tif (q_vector->num_ring_rx)\n+\t\tbudget_per_ring = max(budget / q_vector->num_ring_rx, 1);\n+\n+\tice_for_each_ring(ring, q_vector->rx) {\n+\t\tint cleaned;\n+\n+\t\tcleaned = ice_clean_rx_irq(ring, budget_per_ring);\n+\t\twork_done += cleaned;\n+\t\t/* if we clean as many as budgeted, we must not be done */\n+\t\tif (cleaned >= budget_per_ring)\n+\t\t\tclean_complete = false;\n+\t}\n+\n+\t/* If work not completed, return budget and polling will return */\n+\tif (!clean_complete)\n+\t\treturn budget;\n+\n+\t/* Work is done so exit the polling mode and re-enable the interrupt */\n+\tnapi_complete_done(napi, work_done);\n+\tif (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))\n+\t\tice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);\n+\treturn 0;\n+}\n+\n+/* helper function for building cmd/type/offset */\n+static __le64\n+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)\n+{\n+\treturn cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |\n+\t\t\t   (td_cmd    << ICE_TXD_QW1_CMD_S) |\n+\t\t\t   (td_offset << ICE_TXD_QW1_OFFSET_S) |\n+\t\t\t   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |\n+\t\t\t   (td_tag    << ICE_TXD_QW1_L2TAG1_S));\n+}\n+\n+/**\n+ * __ice_maybe_stop_tx - 2nd level check for tx stop conditions\n+ * @tx_ring: the ring to be checked\n+ * @size: the size buffer we want to assure is available\n+ *\n+ * Returns -EBUSY if a stop is needed, else 0\n+ */\n+static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)\n+{\n+\tnetif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);\n+\t/* Memory barrier before checking head and tail */\n+\tsmp_mb();\n+\n+\t/* Check again in a case another CPU has just made room available. */\n+\tif (likely(ICE_DESC_UNUSED(tx_ring) < size))\n+\t\treturn -EBUSY;\n+\n+\t/* A reprieve! - use start_subqueue because it doesn't call schedule */\n+\tnetif_start_subqueue(tx_ring->netdev, tx_ring->q_index);\n+\t++tx_ring->tx_stats.restart_q;\n+\treturn 0;\n+}\n+\n+/**\n+ * ice_maybe_stop_tx - 1st level check for tx stop conditions\n+ * @tx_ring: the ring to be checked\n+ * @size:    the size buffer we want to assure is available\n+ *\n+ * Returns 0 if stop is not needed\n+ */\n+static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)\n+{\n+\tif (likely(ICE_DESC_UNUSED(tx_ring) >= size))\n+\t\treturn 0;\n+\treturn __ice_maybe_stop_tx(tx_ring, size);\n+}\n+\n+/**\n+ * ice_tx_map - Build the Tx descriptor\n+ * @tx_ring: ring to send buffer on\n+ * @first: first buffer info buffer to use\n+ *\n+ * This function loops over the skb data pointed to by *first\n+ * and gets a physical address for each memory location and programs\n+ * it and the length into the transmit descriptor.\n+ */\n+static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first)\n+{\n+\tu64 td_offset = 0, td_tag = 0, td_cmd = 0;\n+\tu16 i = tx_ring->next_to_use;\n+\tstruct skb_frag_struct *frag;\n+\tunsigned int data_len, size;\n+\tstruct ice_tx_desc *tx_desc;\n+\tstruct ice_tx_buf *tx_buf;\n+\tstruct sk_buff *skb;\n+\tdma_addr_t dma;\n+\n+\tskb = first->skb;\n+\n+\tdata_len = skb->data_len;\n+\tsize = skb_headlen(skb);\n+\n+\ttx_desc = ICE_TX_DESC(tx_ring, i);\n+\n+\tdma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);\n+\n+\ttx_buf = first;\n+\n+\tfor (frag = &skb_shinfo(skb)->frags[0];; frag++) {\n+\t\tunsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;\n+\n+\t\tif (dma_mapping_error(tx_ring->dev, dma))\n+\t\t\tgoto dma_error;\n+\n+\t\t/* record length, and DMA address */\n+\t\tdma_unmap_len_set(tx_buf, len, size);\n+\t\tdma_unmap_addr_set(tx_buf, dma, dma);\n+\n+\t\t/* align size to end of page */\n+\t\tmax_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);\n+\t\ttx_desc->buf_addr = cpu_to_le64(dma);\n+\n+\t\t/* account for data chunks larger than the hardware\n+\t\t * can handle\n+\t\t */\n+\t\twhile (unlikely(size > ICE_MAX_DATA_PER_TXD)) {\n+\t\t\ttx_desc->cmd_type_offset_bsz =\n+\t\t\t\tbuild_ctob(td_cmd, td_offset, max_data, td_tag);\n+\n+\t\t\ttx_desc++;\n+\t\t\ti++;\n+\n+\t\t\tif (i == tx_ring->count) {\n+\t\t\t\ttx_desc = ICE_TX_DESC(tx_ring, 0);\n+\t\t\t\ti = 0;\n+\t\t\t}\n+\n+\t\t\tdma += max_data;\n+\t\t\tsize -= max_data;\n+\n+\t\t\tmax_data = ICE_MAX_DATA_PER_TXD_ALIGNED;\n+\t\t\ttx_desc->buf_addr = cpu_to_le64(dma);\n+\t\t}\n+\n+\t\tif (likely(!data_len))\n+\t\t\tbreak;\n+\n+\t\ttx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,\n+\t\t\t\t\t\t\t  size, td_tag);\n+\n+\t\ttx_desc++;\n+\t\ti++;\n+\n+\t\tif (i == tx_ring->count) {\n+\t\t\ttx_desc = ICE_TX_DESC(tx_ring, 0);\n+\t\t\ti = 0;\n+\t\t}\n+\n+\t\tsize = skb_frag_size(frag);\n+\t\tdata_len -= size;\n+\n+\t\tdma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,\n+\t\t\t\t       DMA_TO_DEVICE);\n+\n+\t\ttx_buf = &tx_ring->tx_buf[i];\n+\t}\n+\n+\t/* record bytecount for BQL */\n+\tnetdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);\n+\n+\t/* record SW timestamp if HW timestamp is not available */\n+\tskb_tx_timestamp(first->skb);\n+\n+\ti++;\n+\tif (i == tx_ring->count)\n+\t\ti = 0;\n+\n+\t/* write last descriptor with RS and EOP bits */\n+\ttd_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);\n+\ttx_desc->cmd_type_offset_bsz =\n+\t\t\tbuild_ctob(td_cmd, td_offset, size, td_tag);\n+\n+\t/* Force memory writes to complete before letting h/w know there\n+\t * are new descriptors to fetch.\n+\t *\n+\t * We also use this memory barrier to make certain all of the\n+\t * status bits have been updated before next_to_watch is written.\n+\t */\n+\twmb();\n+\n+\t/* set next_to_watch value indicating a packet is present */\n+\tfirst->next_to_watch = tx_desc;\n+\n+\ttx_ring->next_to_use = i;\n+\n+\tice_maybe_stop_tx(tx_ring, DESC_NEEDED);\n+\n+\t/* notify HW of packet */\n+\tif (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {\n+\t\twritel(i, tx_ring->tail);\n+\n+\t\t/* we need this if more than one processor can write to our tail\n+\t\t * at a time, it synchronizes IO on IA64/Altix systems\n+\t\t */\n+\t\tmmiowb();\n+\t}\n+\n+\treturn;\n+\n+dma_error:\n+\t/* clear dma mappings for failed tx_buf map */\n+\tfor (;;) {\n+\t\ttx_buf = &tx_ring->tx_buf[i];\n+\t\tice_unmap_and_free_tx_buf(tx_ring, tx_buf);\n+\t\tif (tx_buf == first)\n+\t\t\tbreak;\n+\t\tif (i == 0)\n+\t\t\ti = tx_ring->count;\n+\t\ti--;\n+\t}\n+\n+\ttx_ring->next_to_use = i;\n+}\n+\n+/**\n+ * ice_txd_use_count  - estimate the number of descriptors needed for Tx\n+ * @size: transmit request size in bytes\n+ *\n+ * Due to hardware alignment restrictions (4K alignment), we need to\n+ * assume that we can have no more than 12K of data per descriptor, even\n+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.\n+ * Thus, we need to divide by 12K. But division is slow! Instead,\n+ * we decompose the operation into shifts and one relatively cheap\n+ * multiply operation.\n+ *\n+ * To divide by 12K, we first divide by 4K, then divide by 3:\n+ *     To divide by 4K, shift right by 12 bits\n+ *     To divide by 3, multiply by 85, then divide by 256\n+ *     (Divide by 256 is done by shifting right by 8 bits)\n+ * Finally, we add one to round up. Because 256 isn't an exact multiple of\n+ * 3, we'll underestimate near each multiple of 12K. This is actually more\n+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last\n+ * segment.  For our purposes this is accurate out to 1M which is orders of\n+ * magnitude greater than our largest possible GSO size.\n+ *\n+ * This would then be implemented as:\n+ *     return (((size >> 12) * 85) >> 8) + 1;\n+ *\n+ * Since multiplication and division are commutative, we can reorder\n+ * operations into:\n+ *     return ((size * 85) >> 20) + 1;\n+ */\n+static unsigned int ice_txd_use_count(unsigned int size)\n+{\n+\treturn ((size * 85) >> 20) + 1;\n+}\n+\n+/**\n+ * ice_xmit_desc_count - calculate number of tx descriptors needed\n+ * @skb: send buffer\n+ *\n+ * Returns number of data descriptors needed for this skb.\n+ */\n+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)\n+{\n+\tconst struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];\n+\tunsigned int nr_frags = skb_shinfo(skb)->nr_frags;\n+\tunsigned int count = 0, size = skb_headlen(skb);\n+\n+\tfor (;;) {\n+\t\tcount += ice_txd_use_count(size);\n+\n+\t\tif (!nr_frags--)\n+\t\t\tbreak;\n+\n+\t\tsize = skb_frag_size(frag++);\n+\t}\n+\n+\treturn count;\n+}\n+\n+/**\n+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet\n+ * @skb: send buffer\n+ *\n+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire\n+ * and so we need to figure out the cases where we need to linearize the skb.\n+ *\n+ * For TSO we need to count the TSO header and segment payload separately.\n+ * As such we need to check cases where we have 7 fragments or more as we\n+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for\n+ * the segment payload in the first descriptor, and another 7 for the\n+ * fragments.\n+ */\n+static bool __ice_chk_linearize(struct sk_buff *skb)\n+{\n+\tconst struct skb_frag_struct *frag, *stale;\n+\tint nr_frags, sum;\n+\n+\t/* no need to check if number of frags is less than 7 */\n+\tnr_frags = skb_shinfo(skb)->nr_frags;\n+\tif (nr_frags < (ICE_MAX_BUF_TXD - 1))\n+\t\treturn false;\n+\n+\t/* We need to walk through the list and validate that each group\n+\t * of 6 fragments totals at least gso_size.\n+\t */\n+\tnr_frags -= ICE_MAX_BUF_TXD - 2;\n+\tfrag = &skb_shinfo(skb)->frags[0];\n+\n+\t/* Initialize size to the negative value of gso_size minus 1.  We\n+\t * use this as the worst case scenerio in which the frag ahead\n+\t * of us only provides one byte which is why we are limited to 6\n+\t * descriptors for a single transmit as the header and previous\n+\t * fragment are already consuming 2 descriptors.\n+\t */\n+\tsum = 1 - skb_shinfo(skb)->gso_size;\n+\n+\t/* Add size of frags 0 through 4 to create our initial sum */\n+\tsum += skb_frag_size(frag++);\n+\tsum += skb_frag_size(frag++);\n+\tsum += skb_frag_size(frag++);\n+\tsum += skb_frag_size(frag++);\n+\tsum += skb_frag_size(frag++);\n+\n+\t/* Walk through fragments adding latest fragment, testing it, and\n+\t * then removing stale fragments from the sum.\n+\t */\n+\tstale = &skb_shinfo(skb)->frags[0];\n+\tfor (;;) {\n+\t\tsum += skb_frag_size(frag++);\n+\n+\t\t/* if sum is negative we failed to make sufficient progress */\n+\t\tif (sum < 0)\n+\t\t\treturn true;\n+\n+\t\tif (!nr_frags--)\n+\t\t\tbreak;\n+\n+\t\tsum -= skb_frag_size(stale++);\n+\t}\n+\n+\treturn false;\n+}\n+\n+/**\n+ * ice_chk_linearize - Check if there are more than 8 fragments per packet\n+ * @skb:      send buffer\n+ * @count:    number of buffers used\n+ *\n+ * Note: Our HW can't scatter-gather more than 8 fragments to build\n+ * a packet on the wire and so we need to figure out the cases where we\n+ * need to linearize the skb.\n+ */\n+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)\n+{\n+\t/* Both TSO and single send will work if count is less than 8 */\n+\tif (likely(count < ICE_MAX_BUF_TXD))\n+\t\treturn false;\n+\n+\tif (skb_is_gso(skb))\n+\t\treturn __ice_chk_linearize(skb);\n+\n+\t/* we can support up to 8 data buffers for a single send */\n+\treturn count != ICE_MAX_BUF_TXD;\n+}\n+\n+/**\n+ * ice_xmit_frame_ring - Sends buffer on Tx ring\n+ * @skb: send buffer\n+ * @tx_ring: ring to send buffer on\n+ *\n+ * Returns NETDEV_TX_OK if sent, else an error code\n+ */\n+static netdev_tx_t\n+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)\n+{\n+\tstruct ice_tx_buf *first;\n+\tunsigned int count;\n+\n+\tcount = ice_xmit_desc_count(skb);\n+\tif (ice_chk_linearize(skb, count)) {\n+\t\tif (__skb_linearize(skb))\n+\t\t\tgoto out_drop;\n+\t\tcount = ice_txd_use_count(skb->len);\n+\t\ttx_ring->tx_stats.tx_linearize++;\n+\t}\n+\n+\t/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,\n+\t *       + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,\n+\t *       + 4 desc gap to avoid the cache line where head is,\n+\t *       + 1 desc for context descriptor,\n+\t * otherwise try next time\n+\t */\n+\tif (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {\n+\t\ttx_ring->tx_stats.tx_busy++;\n+\t\treturn NETDEV_TX_BUSY;\n+\t}\n+\n+\t/* record the location of the first descriptor for this packet */\n+\tfirst = &tx_ring->tx_buf[tx_ring->next_to_use];\n+\tfirst->skb = skb;\n+\tfirst->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);\n+\tfirst->gso_segs = 1;\n+\n+\tice_tx_map(tx_ring, first);\n+\treturn NETDEV_TX_OK;\n+\n+out_drop:\n+\tdev_kfree_skb_any(skb);\n+\treturn NETDEV_TX_OK;\n+}\n+\n+/**\n+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer\n+ * @skb: send buffer\n+ * @netdev: network interface device structure\n+ *\n+ * Returns NETDEV_TX_OK if sent, else an error code\n+ */\n+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)\n+{\n+\tstruct ice_netdev_priv *np = netdev_priv(netdev);\n+\tstruct ice_vsi *vsi = np->vsi;\n+\tstruct ice_ring *tx_ring;\n+\n+\ttx_ring = vsi->tx_rings[skb->queue_mapping];\n+\n+\t/* hardware can't handle really short frames, hardware padding works\n+\t * beyond this point\n+\t */\n+\tif (skb_put_padto(skb, ICE_MIN_TX_LEN))\n+\t\treturn NETDEV_TX_OK;\n+\n+\treturn ice_xmit_frame_ring(skb, tx_ring);\n+}\ndiff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h\nindex 367bfc6fa485..4bcdb79ef181 100644\n--- a/drivers/net/ethernet/intel/ice/ice_txrx.h\n+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h\n@@ -21,8 +21,23 @@\n #define ICE_DFLT_IRQ_WORK\t256\n #define ICE_RXBUF_2048\t\t2048\n #define ICE_MAX_CHAINED_RX_BUFS\t5\n+#define ICE_MAX_BUF_TXD\t\t8\n+#define ICE_MIN_TX_LEN\t\t17\n+\n+/* The size limit for a transmit buffer in a descriptor is (16K - 1).\n+ * In order to align with the read requests we will align the value to\n+ * the nearest 4K which represents our maximum read request size.\n+ */\n+#define ICE_MAX_READ_REQ_SIZE\t4096\n+#define ICE_MAX_DATA_PER_TXD\t(16 * 1024 - 1)\n+#define ICE_MAX_DATA_PER_TXD_ALIGNED \\\n+\t(~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)\n+\n+#define ICE_RX_BUF_WRITE\t16\t/* Must be power of 2 */\n #define ICE_MAX_TXQ_PER_TXQG\t128\n \n+/* Tx Descriptors needed, worst case */\n+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)\n #define ICE_DESC_UNUSED(R)\t\\\n \t((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \\\n \t(R)->next_to_clean - (R)->next_to_use - 1)\n@@ -44,6 +59,24 @@ struct ice_rx_buf {\n \tunsigned int page_offset;\n };\n \n+struct ice_q_stats {\n+\tu64 pkts;\n+\tu64 bytes;\n+};\n+\n+struct ice_txq_stats {\n+\tu64 restart_q;\n+\tu64 tx_busy;\n+\tu64 tx_linearize;\n+};\n+\n+struct ice_rxq_stats {\n+\tu64 non_eop_descs;\n+\tu64 alloc_page_failed;\n+\tu64 alloc_buf_failed;\n+\tu64 page_reuse_count;\n+};\n+\n /* this enum matches hardware bits and is meant to be used by DYN_CTLN\n  * registers and QINT registers or more generally anywhere in the manual\n  * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any\n@@ -108,6 +141,15 @@ struct ice_ring {\n \tu16 next_to_clean;\n \n \tbool ring_active;\t\t/* is ring online or not */\n+\n+\t/* stats structs */\n+\tstruct ice_q_stats\tstats;\n+\tstruct u64_stats_sync syncp;\n+\tunion {\n+\t\tstruct ice_txq_stats tx_stats;\n+\t\tstruct ice_rxq_stats rx_stats;\n+\t};\n+\n \tunsigned int size;\t\t/* length of descriptor ring in bytes */\n \tdma_addr_t dma;\t\t\t/* physical address of ring */\n \tstruct rcu_head rcu;\t\t/* to avoid race on free */\n@@ -135,10 +177,13 @@ struct ice_ring_container {\n \tfor (pos = (head).ring; pos; pos = pos->next)\n \n bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);\n+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);\n void ice_clean_tx_ring(struct ice_ring *tx_ring);\n void ice_clean_rx_ring(struct ice_ring *rx_ring);\n int ice_setup_tx_ring(struct ice_ring *tx_ring);\n int ice_setup_rx_ring(struct ice_ring *rx_ring);\n void ice_free_tx_ring(struct ice_ring *tx_ring);\n void ice_free_rx_ring(struct ice_ring *rx_ring);\n+int ice_napi_poll(struct napi_struct *napi, int budget);\n+\n #endif /* _ICE_TXRX_H_ */\n",
    "prefixes": [
        "v3",
        "10/15"
    ]
}