Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/854014/?format=api
{ "id": 854014, "url": "http://patchwork.ozlabs.org/api/patches/854014/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20171229135219.14787-1-alice.michael@intel.com/", "project": { "id": 46, "url": "http://patchwork.ozlabs.org/api/projects/46/?format=api", "name": "Intel Wired Ethernet development", "link_name": "intel-wired-lan", "list_id": "intel-wired-lan.osuosl.org", "list_email": "intel-wired-lan@osuosl.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20171229135219.14787-1-alice.michael@intel.com>", "list_archive_url": null, "date": "2017-12-29T13:52:19", "name": "[next,S85-V1,14/14] i40e/i40evf: Add support for new mechanism of updating adaptive ITR", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": false, "hash": "c49b7509ff510f9e79b94deede4eb2c7f046b820", "submitter": { "id": 71123, "url": "http://patchwork.ozlabs.org/api/people/71123/?format=api", "name": "Michael, Alice", "email": "alice.michael@intel.com" }, "delegate": { "id": 68, "url": "http://patchwork.ozlabs.org/api/users/68/?format=api", "username": "jtkirshe", "first_name": "Jeff", "last_name": "Kirsher", "email": "jeffrey.t.kirsher@intel.com" }, "mbox": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20171229135219.14787-1-alice.michael@intel.com/mbox/", "series": [ { "id": 20665, "url": "http://patchwork.ozlabs.org/api/series/20665/?format=api", "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/list/?series=20665", "date": "2017-12-29T13:48:33", "name": "[next,S85-V1,01/14] i40e: fix typo in function description", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/20665/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/854014/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/854014/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<intel-wired-lan-bounces@osuosl.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Delivered-To": [ "patchwork-incoming@bilbo.ozlabs.org", "intel-wired-lan@lists.osuosl.org" ], "Authentication-Results": "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=osuosl.org\n\t(client-ip=140.211.166.136; helo=silver.osuosl.org;\n\tenvelope-from=intel-wired-lan-bounces@osuosl.org;\n\treceiver=<UNKNOWN>)", "Received": [ "from silver.osuosl.org (smtp3.osuosl.org [140.211.166.136])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3z7gV940q5z9s7G\n\tfor <incoming@patchwork.ozlabs.org>;\n\tSat, 30 Dec 2017 08:59:29 +1100 (AEDT)", "from localhost (localhost [127.0.0.1])\n\tby silver.osuosl.org (Postfix) with ESMTP id 5B37F292AC;\n\tFri, 29 Dec 2017 21:59:27 +0000 (UTC)", "from silver.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id mHxK41ZZdUxg; Fri, 29 Dec 2017 21:59:21 +0000 (UTC)", "from ash.osuosl.org (ash.osuosl.org [140.211.166.34])\n\tby silver.osuosl.org (Postfix) with ESMTP id C5CB0292AA;\n\tFri, 29 Dec 2017 21:59:21 +0000 (UTC)", "from whitealder.osuosl.org (smtp1.osuosl.org [140.211.166.138])\n\tby ash.osuosl.org (Postfix) with ESMTP id C52131C0180\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tFri, 29 Dec 2017 21:59:18 +0000 (UTC)", "from localhost (localhost [127.0.0.1])\n\tby whitealder.osuosl.org (Postfix) with ESMTP id BF5C5878E6\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tFri, 29 Dec 2017 21:59:18 +0000 (UTC)", "from whitealder.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id 9vbxxed8zvwR for <intel-wired-lan@lists.osuosl.org>;\n\tFri, 29 Dec 2017 21:59:16 +0000 (UTC)", "from mga04.intel.com (mga04.intel.com [192.55.52.120])\n\tby whitealder.osuosl.org (Postfix) with ESMTPS id 74F4E87698\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tFri, 29 Dec 2017 21:59:16 +0000 (UTC)", "from orsmga006.jf.intel.com ([10.7.209.51])\n\tby fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t29 Dec 2017 13:59:15 -0800", "from alicemic-2.jf.intel.com ([10.166.16.121])\n\tby orsmga006.jf.intel.com with ESMTP; 29 Dec 2017 13:59:15 -0800" ], "X-Virus-Scanned": [ "amavisd-new at osuosl.org", "amavisd-new at osuosl.org" ], "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6", "X-Amp-Result": "SKIPPED(no attachment in message)", "X-Amp-File-Uploaded": "False", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.45,478,1508828400\"; d=\"scan'208\";a=\"6756383\"", "From": "Alice Michael <alice.michael@intel.com>", "To": "alice.michael@intel.com,\n\tintel-wired-lan@lists.osuosl.org", "Date": "Fri, 29 Dec 2017 08:52:19 -0500", "Message-Id": "<20171229135219.14787-1-alice.michael@intel.com>", "X-Mailer": "git-send-email 2.9.5", "Subject": "[Intel-wired-lan] [next PATCH S85-V1 14/14] i40e/i40evf: Add\n\tsupport for new mechanism of updating adaptive ITR", "X-BeenThere": "intel-wired-lan@osuosl.org", "X-Mailman-Version": "2.1.24", "Precedence": "list", "List-Id": "Intel Wired Ethernet Linux Kernel Driver Development\n\t<intel-wired-lan.osuosl.org>", "List-Unsubscribe": "<https://lists.osuosl.org/mailman/options/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=unsubscribe>", "List-Archive": "<http://lists.osuosl.org/pipermail/intel-wired-lan/>", "List-Post": "<mailto:intel-wired-lan@osuosl.org>", "List-Help": "<mailto:intel-wired-lan-request@osuosl.org?subject=help>", "List-Subscribe": "<https://lists.osuosl.org/mailman/listinfo/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=subscribe>", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"us-ascii\"", "Content-Transfer-Encoding": "7bit", "Errors-To": "intel-wired-lan-bounces@osuosl.org", "Sender": "\"Intel-wired-lan\" <intel-wired-lan-bounces@osuosl.org>" }, "content": "From: Alexander Duyck <alexander.h.duyck@intel.com>\n\nThis patch replaces the existing mechanism for determining the correct\nvalue to program for adaptive ITR with yet another new and more\ncomplicated approach.\n\nThe basic idea from a 30K foot view is that this new approach will push the\nRx interrupt moderation up so that by default it starts in low latency and\nis gradually pushed up into a higher latency setup as long as doing so\nincreases the number of packets processed, if the number of packets drops\nto 4 to 1 per packet we will reset and just base our ITR on the size of the\npackets being received. For Tx we leave it floating at a high interrupt\ndelay and do not pull it down unless we start processing more than 112\npackets per interrupt. If we start exceeding that we will cut our interrupt\nrates in half until we are back below 112.\n\nThe side effect of these patches are that we will be processing more\npackets per interrupt. This is both a good and a bad thing as it means we\nwill not be blocking processing in the case of things like pktgen and XDP,\nbut we will also be consuming a bit more CPU in the cases of things such as\nnetwork throughput tests using netperf.\n\nOne delta from this versus the ixgbe version of the changes is that I have\nmade the interrupt moderation a bit more aggressive when we are in bulk\nmode by moving our \"goldilocks zone\" up from 48 to 96 to 56 to 112. The\nmain motivation behind moving this is to address the fact that we need to\nupdate less frequently, and have more fine grained control due to the\nseparate Tx and Rx ITR times.\n\nSigned-off-by: Alexander Duyck <alexander.h.duyck@intel.com>\n---\n drivers/net/ethernet/intel/i40e/i40e.h | 3 +-\n drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +-\n drivers/net/ethernet/intel/i40e/i40e_txrx.c | 362 ++++++++++++++++--------\n drivers/net/ethernet/intel/i40e/i40e_txrx.h | 17 +-\n drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 362 ++++++++++++++++--------\n drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 17 +-\n drivers/net/ethernet/intel/i40evf/i40evf.h | 3 +-\n drivers/net/ethernet/intel/i40evf/i40evf_main.c | 6 +-\n 8 files changed, 528 insertions(+), 257 deletions(-)", "diff": "diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h\nindex 46e9f4e..ebe795a 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e.h\n+++ b/drivers/net/ethernet/intel/i40e/i40e.h\n@@ -824,6 +824,7 @@ struct i40e_q_vector {\n \tstruct i40e_ring_container rx;\n \tstruct i40e_ring_container tx;\n \n+\tu8 itr_countdown;\t/* when 0 should adjust adaptive ITR */\n \tu8 num_ringpairs;\t/* total number of ring pairs in vector */\n \n \tcpumask_t affinity_mask;\n@@ -832,8 +833,6 @@ struct i40e_q_vector {\n \tstruct rcu_head rcu;\t/* to avoid race with update stats on free */\n \tchar name[I40E_INT_NAME_STR_LEN];\n \tbool arm_wb_state;\n-#define ITR_COUNTDOWN_START 100\n-\tu8 itr_countdown;\t/* when 0 should adjust ITR */\n } ____cacheline_internodealigned_in_smp;\n \n /* lan device */\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c\nindex 6380d03..92ad8ba 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_main.c\n+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c\n@@ -3433,19 +3433,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)\n \tfor (i = 0; i < vsi->num_q_vectors; i++, vector++) {\n \t\tstruct i40e_q_vector *q_vector = vsi->q_vectors[i];\n \n-\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\t\tq_vector->rx.next_update = jiffies + 1;\n \t\tq_vector->rx.target_itr =\n \t\t\tITR_TO_REG(vsi->rx_rings[i]->itr_setting);\n-\t\tq_vector->rx.latency_range = I40E_LOW_LATENCY;\n \t\twr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),\n \t\t q_vector->rx.target_itr);\n \t\tq_vector->rx.current_itr = q_vector->rx.target_itr;\n+\n+\t\tq_vector->tx.next_update = jiffies + 1;\n \t\tq_vector->tx.target_itr =\n \t\t\tITR_TO_REG(vsi->tx_rings[i]->itr_setting);\n-\t\tq_vector->tx.latency_range = I40E_LOW_LATENCY;\n \t\twr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),\n \t\t q_vector->tx.target_itr);\n \t\tq_vector->tx.current_itr = q_vector->tx.target_itr;\n+\n \t\twr32(hw, I40E_PFINT_RATEN(vector - 1),\n \t\t i40e_intrl_usec_to_reg(vsi->int_rate_limit));\n \n@@ -3546,13 +3547,12 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)\n \tu32 val;\n \n \t/* set the ITR configuration */\n-\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\tq_vector->rx.next_update = jiffies + 1;\n \tq_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);\n-\tq_vector->rx.latency_range = I40E_LOW_LATENCY;\n \twr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);\n \tq_vector->rx.current_itr = q_vector->rx.target_itr;\n+\tq_vector->tx.next_update = jiffies + 1;\n \tq_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);\n-\tq_vector->tx.latency_range = I40E_LOW_LATENCY;\n \twr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);\n \tq_vector->tx.current_itr = q_vector->tx.target_itr;\n \n@@ -10389,9 +10389,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)\n \t\tnetif_napi_add(vsi->netdev, &q_vector->napi,\n \t\t\t i40e_napi_poll, NAPI_POLL_WEIGHT);\n \n-\tq_vector->rx.latency_range = I40E_LOW_LATENCY;\n-\tq_vector->tx.latency_range = I40E_LOW_LATENCY;\n-\n \t/* tie q_vector and vsi together */\n \tvsi->q_vectors[v_idx] = q_vector;\n \ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c\nindex d810e0e..6d14ee6 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c\n+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c\n@@ -994,97 +994,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)\n \t}\n }\n \n+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,\n+\t\t\t\t\tstruct i40e_ring_container *rc)\n+{\n+\treturn &q_vector->rx == rc;\n+}\n+\n+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)\n+{\n+\tunsigned int divisor;\n+\n+\tswitch (q_vector->vsi->back->hw.phy.link_info.link_speed) {\n+\tcase I40E_LINK_SPEED_40GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;\n+\t\tbreak;\n+\tcase I40E_LINK_SPEED_25GB:\n+\tcase I40E_LINK_SPEED_20GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;\n+\t\tbreak;\n+\tdefault:\n+\tcase I40E_LINK_SPEED_10GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;\n+\t\tbreak;\n+\tcase I40E_LINK_SPEED_1GB:\n+\tcase I40E_LINK_SPEED_100MB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;\n+\t\tbreak;\n+\t}\n+\n+\treturn divisor;\n+}\n+\n /**\n- * i40e_set_new_dynamic_itr - Find new ITR level\n+ * i40e_update_itr - update the dynamic ITR value based on statistics\n+ * @q_vector: structure containing interrupt and ring information\n * @rc: structure containing ring performance data\n *\n- * Returns true if ITR changed, false if not\n- *\n- * Stores a new ITR value based on packets and byte counts during\n- * the last interrupt. The advantage of per interrupt computation\n- * is faster updates and more accurate ITR for the current traffic\n- * pattern. Constants in this function were computed based on\n- * theoretical maximum wire speed and thresholds were set based on\n- * testing data as well as attempting to minimize response time\n+ * Stores a new ITR value based on packets and byte\n+ * counts during the last interrupt. The advantage of per interrupt\n+ * computation is faster updates and more accurate ITR for the current\n+ * traffic pattern. Constants in this function were computed\n+ * based on theoretical maximum wire speed and thresholds were set based\n+ * on testing data as well as attempting to minimize response time\n * while increasing bulk throughput.\n **/\n-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)\n+static void i40e_update_itr(struct i40e_q_vector *q_vector,\n+\t\t\t struct i40e_ring_container *rc)\n {\n-\tenum i40e_latency_range new_latency_range = rc->latency_range;\n-\tint bytes_per_usec;\n-\tunsigned int usecs, estimated_usecs;\n+\tunsigned int avg_wire_size, packets, bytes, itr;\n+\tunsigned long next_update = jiffies;\n \n+\t/* If we don't have any rings just leave ourselves set for maximum\n+\t * possible latency so we take ourselves out of the equation.\n+\t */\n \tif (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))\n-\t\treturn false;\n+\t\treturn;\n \n-\tif (!rc->total_packets || !rc->current_itr)\n-\t\treturn false;\n+\t/* For Rx we want to push the delay up and default to low latency.\n+\t * for Tx we want to pull the delay down and default to high latency.\n+\t */\n+\titr = i40e_container_is_rx(q_vector, rc) ?\n+\t I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :\n+\t I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;\n+\n+\t/* If we didn't update within up to 1 - 2 jiffies we can assume\n+\t * that either packets are coming in so slow there hasn't been\n+\t * any work, or that there is so much work that NAPI is dealing\n+\t * with interrupt moderation and we don't need to do anything.\n+\t */\n+\tif (time_after(next_update, rc->next_update))\n+\t\tgoto clear_counts;\n+\n+\t/* If itr_countdown is set it means we programmed an ITR within\n+\t * the last 4 interrupt cycles. This has a side effect of us\n+\t * potentially firing an early interrupt. In order to work around\n+\t * this we need to throw out any data received for a few\n+\t * interrupts following the update.\n+\t */\n+\tif (q_vector->itr_countdown) {\n+\t\titr = rc->target_itr;\n+\t\tgoto clear_counts;\n+\t}\n \n-\tusecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;\n-\tbytes_per_usec = rc->total_bytes / usecs;\n+\tpackets = rc->total_packets;\n+\tbytes = rc->total_bytes;\n \n-\t/* The calculations in this algorithm depend on interrupts actually\n-\t * firing at the ITR rate. This may not happen if the packet rate is\n-\t * really low, or if we've been napi polling. Check to make sure\n-\t * that's not the case before we continue.\n+\tif (i40e_container_is_rx(q_vector, rc)) {\n+\t\t/* If Rx there are 1 to 4 packets and bytes are less than\n+\t\t * 9000 assume insufficient data to use bulk rate limiting\n+\t\t * approach unless Tx is already in bulk rate limiting. We\n+\t\t * are likely latency driven.\n+\t\t */\n+\t\tif (packets && packets < 4 && bytes < 9000 &&\n+\t\t (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {\n+\t\t\titr = I40E_ITR_ADAPTIVE_LATENCY;\n+\t\t\tgoto adjust_by_size;\n+\t\t}\n+\t} else if (packets < 4) {\n+\t\t/* If we have Tx and Rx ITR maxed and Tx ITR is running in\n+\t\t * bulk mode and we are receiving 4 or fewer packets just\n+\t\t * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so\n+\t\t * that the Rx can relax.\n+\t\t */\n+\t\tif (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&\n+\t\t (q_vector->rx.target_itr & I40E_ITR_MASK) ==\n+\t\t I40E_ITR_ADAPTIVE_MAX_USECS)\n+\t\t\tgoto clear_counts;\n+\t} else if (packets > 32) {\n+\t\t/* If we have processed over 32 packets in a single interrupt\n+\t\t * for Tx assume we need to switch over to \"bulk\" mode.\n+\t\t */\n+\t\trc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;\n+\t}\n+\n+\t/* We have no packets to actually measure against. This means\n+\t * either one of the other queues on this vector is active or\n+\t * we are a Tx queue doing TSO with too high of an interrupt rate.\n+\t *\n+\t * Between 4 and 56 we can assume that our current interrupt delay\n+\t * is only slightly too low. As such we should increase it by a small\n+\t * fixed amount.\n \t */\n-\testimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);\n-\tif (estimated_usecs > usecs) {\n-\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tgoto reset_latency;\n+\tif (packets < 56) {\n+\t\titr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;\n+\t\tif ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {\n+\t\t\titr &= I40E_ITR_ADAPTIVE_LATENCY;\n+\t\t\titr += I40E_ITR_ADAPTIVE_MAX_USECS;\n+\t\t}\n+\t\tgoto clear_counts;\n \t}\n \n-\t/* simple throttlerate management\n-\t * 0-10MB/s lowest (50000 ints/s)\n-\t * 10-20MB/s low (20000 ints/s)\n-\t * 20-1249MB/s bulk (18000 ints/s)\n+\tif (packets <= 256) {\n+\t\titr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);\n+\t\titr &= I40E_ITR_MASK;\n+\n+\t\t/* Between 56 and 112 is our \"goldilocks\" zone where we are\n+\t\t * working out \"just right\". Just report that our current\n+\t\t * ITR is good for us.\n+\t\t */\n+\t\tif (packets <= 112)\n+\t\t\tgoto clear_counts;\n+\n+\t\t/* If packet count is 128 or greater we are likely looking\n+\t\t * at a slight overrun of the delay we want. Try halving\n+\t\t * our delay to see if that will cut the number of packets\n+\t\t * in half per interrupt.\n+\t\t */\n+\t\titr /= 2;\n+\t\titr &= I40E_ITR_MASK;\n+\t\tif (itr < I40E_ITR_ADAPTIVE_MIN_USECS)\n+\t\t\titr = I40E_ITR_ADAPTIVE_MIN_USECS;\n+\n+\t\tgoto clear_counts;\n+\t}\n+\n+\t/* The paths below assume we are dealing with a bulk ITR since\n+\t * number of packets is greater than 256. We are just going to have\n+\t * to compute a value and try to bring the count under control,\n+\t * though for smaller packet sizes there isn't much we can do as\n+\t * NAPI polling will likely be kicking in sooner rather than later.\n+\t */\n+\titr = I40E_ITR_ADAPTIVE_BULK;\n+\n+adjust_by_size:\n+\t/* If packet counts are 256 or greater we can assume we have a gross\n+\t * overestimation of what the rate should be. Instead of trying to fine\n+\t * tune it just use the formula below to try and dial in an exact value\n+\t * give the current packet size of the frame.\n+\t */\n+\tavg_wire_size = bytes / packets;\n+\n+\t/* The following is a crude approximation of:\n+\t * wmem_default / (size + overhead) = desired_pkts_per_int\n+\t * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate\n+\t * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value\n \t *\n-\t * The math works out because the divisor is in 10^(-6) which\n-\t * turns the bytes/us input value into MB/s values, but\n-\t * make sure to use usecs, as the register values written\n-\t * are in 2 usec increments in the ITR registers, and make sure\n-\t * to use the smoothed values that the countdown timer gives us.\n+\t * Assuming wmem_default is 212992 and overhead is 640 bytes per\n+\t * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the\n+\t * formula down to\n+\t *\n+\t * (170 * (size + 24)) / (size + 640) = ITR\n+\t *\n+\t * We first do some math on the packet size and then finally bitshift\n+\t * by 8 after rounding up. We also have to account for PCIe link speed\n+\t * difference as ITR scales based on this.\n \t */\n-\tswitch (new_latency_range) {\n-\tcase I40E_LOWEST_LATENCY:\n-\t\tif (bytes_per_usec > 10)\n-\t\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tbreak;\n-\tcase I40E_LOW_LATENCY:\n-\t\tif (bytes_per_usec > 20)\n-\t\t\tnew_latency_range = I40E_BULK_LATENCY;\n-\t\telse if (bytes_per_usec <= 10)\n-\t\t\tnew_latency_range = I40E_LOWEST_LATENCY;\n-\t\tbreak;\n-\tcase I40E_BULK_LATENCY:\n-\tdefault:\n-\t\tif (bytes_per_usec <= 20)\n-\t\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tbreak;\n+\tif (avg_wire_size <= 60) {\n+\t\t/* Start at 250k ints/sec */\n+\t\tavg_wire_size = 4096;\n+\t} else if (avg_wire_size <= 380) {\n+\t\t/* 250K ints/sec to 60K ints/sec */\n+\t\tavg_wire_size *= 40;\n+\t\tavg_wire_size += 1696;\n+\t} else if (avg_wire_size <= 1084) {\n+\t\t/* 60K ints/sec to 36K ints/sec */\n+\t\tavg_wire_size *= 15;\n+\t\tavg_wire_size += 11452;\n+\t} else if (avg_wire_size <= 1980) {\n+\t\t/* 36K ints/sec to 30K ints/sec */\n+\t\tavg_wire_size *= 5;\n+\t\tavg_wire_size += 22420;\n+\t} else {\n+\t\t/* plateau at a limit of 30K ints/sec */\n+\t\tavg_wire_size = 32256;\n \t}\n \n-reset_latency:\n-\trc->latency_range = new_latency_range;\n+\t/* If we are in low latency mode halve our delay which doubles the\n+\t * rate to somewhere between 100K to 16K ints/sec\n+\t */\n+\tif (itr & I40E_ITR_ADAPTIVE_LATENCY)\n+\t\tavg_wire_size /= 2;\n \n-\tswitch (new_latency_range) {\n-\tcase I40E_LOWEST_LATENCY:\n-\t\trc->target_itr = I40E_ITR_50K;\n-\t\tbreak;\n-\tcase I40E_LOW_LATENCY:\n-\t\trc->target_itr = I40E_ITR_20K;\n-\t\tbreak;\n-\tcase I40E_BULK_LATENCY:\n-\t\trc->target_itr = I40E_ITR_18K;\n-\t\tbreak;\n-\tdefault:\n-\t\tbreak;\n+\t/* Resultant value is 256 times larger than it needs to be. This\n+\t * gives us room to adjust the value as needed to either increase\n+\t * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.\n+\t *\n+\t * Use addition as we have already recorded the new latency flag\n+\t * for the ITR value.\n+\t */\n+\titr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *\n+\t I40E_ITR_ADAPTIVE_MIN_INC;\n+\n+\tif ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {\n+\t\titr &= I40E_ITR_ADAPTIVE_LATENCY;\n+\t\titr += I40E_ITR_ADAPTIVE_MAX_USECS;\n \t}\n \n+clear_counts:\n+\t/* write back value */\n+\trc->target_itr = itr;\n+\n+\t/* next update should occur within next jiffy */\n+\trc->next_update = next_update + 1;\n+\n \trc->total_bytes = 0;\n \trc->total_packets = 0;\n-\trc->last_itr_update = jiffies;\n-\n-\treturn rc->target_itr != rc->current_itr;\n }\n \n /**\n@@ -2289,6 +2433,15 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)\n /* a small macro to shorten up some long lines */\n #define INTREG I40E_PFINT_DYN_CTLN\n \n+/* The act of updating the ITR will cause it to immediately trigger. In order\n+ * to prevent this from throwing off adaptive update statistics we defer the\n+ * update so that it can only happen so often. So after either Tx or Rx are\n+ * updated we make the adaptive scheme wait until either the ITR completely\n+ * expires via the next_update expiration or we have been through at least\n+ * 3 interrupts.\n+ */\n+#define ITR_COUNTDOWN_START 3\n+\n /**\n * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt\n * @vsi: the VSI we care about\n@@ -2299,7 +2452,6 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,\n \t\t\t\t\t struct i40e_q_vector *q_vector)\n {\n \tstruct i40e_hw *hw = &vsi->back->hw;\n-\tbool rx = false, tx = false;\n \tu32 intval;\n \n \t/* If we don't have MSIX, then we only need to re-enable icr0 */\n@@ -2308,61 +2460,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,\n \t\treturn;\n \t}\n \n-\t/* avoid dynamic calculation if in countdown mode */\n-\tif (q_vector->itr_countdown > 0)\n-\t\tgoto enable_int;\n+\t/* These will do nothing if dynamic updates are not enabled */\n+\ti40e_update_itr(q_vector, &q_vector->tx);\n+\ti40e_update_itr(q_vector, &q_vector->rx);\n \n-\t/* these will return false if dynamic mode is disabled */\n-\trx = i40e_set_new_dynamic_itr(&q_vector->rx);\n-\ttx = i40e_set_new_dynamic_itr(&q_vector->tx);\n-\n-\tif (rx || tx) {\n-\t\t/* get the higher of the two ITR adjustments and\n-\t\t * use the same value for both ITR registers\n-\t\t * when in adaptive mode (Rx and/or Tx)\n-\t\t */\n-\t\tu16 itr = max(q_vector->tx.target_itr,\n-\t\t\t q_vector->rx.target_itr);\n-\n-\t\tq_vector->tx.target_itr = itr;\n-\t\tq_vector->rx.target_itr = itr;\n-\t}\n-\n-enable_int:\n-\tif (q_vector->rx.target_itr != q_vector->rx.current_itr) {\n+\t/* This block of logic allows us to get away with only updating\n+\t * one ITR value with each interrupt. The idea is to perform a\n+\t * pseudo-lazy update with the following criteria.\n+\t *\n+\t * 1. Rx is given higher priority than Tx if both are in same state\n+\t * 2. If we must reduce an ITR that is given highest priority.\n+\t * 3. We then give priority to increasing ITR based on amount.\n+\t */\n+\tif (q_vector->rx.target_itr < q_vector->rx.current_itr) {\n+\t\t/* Rx ITR needs to be reduced, this is highest priority */\n \t\tintval = i40e_buildreg_itr(I40E_RX_ITR,\n \t\t\t\t\t q_vector->rx.target_itr);\n \t\tq_vector->rx.current_itr = q_vector->rx.target_itr;\n-\n-\t\tif (q_vector->tx.target_itr != q_vector->tx.current_itr) {\n-\t\t\t/* set the INTENA_MSK_MASK so that this first write\n-\t\t\t * won't actually enable the interrupt, instead just\n-\t\t\t * updating the ITR (it's bit 31 PF and VF)\n-\t\t\t *\n-\t\t\t * don't check _DOWN because interrupt isn't being\n-\t\t\t * enabled\n-\t\t\t */\n-\t\t\twr32(hw, INTREG(q_vector->reg_idx),\n-\t\t\t intval | BIT(31));\n-\t\t\t/* now that Rx is done process Tx update */\n-\t\t\tgoto update_tx;\n-\t\t}\n-\t} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {\n-update_tx:\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\t} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||\n+\t\t ((q_vector->rx.target_itr - q_vector->rx.current_itr) <\n+\t\t (q_vector->tx.target_itr - q_vector->tx.current_itr))) {\n+\t\t/* Tx ITR needs to be reduced, this is second priority\n+\t\t * Tx ITR needs to be increased more than Rx, fourth priority\n+\t\t */\n \t\tintval = i40e_buildreg_itr(I40E_TX_ITR,\n \t\t\t\t\t q_vector->tx.target_itr);\n \t\tq_vector->tx.current_itr = q_vector->tx.target_itr;\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\t} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {\n+\t\t/* Rx ITR needs to be increased, third priority */\n+\t\tintval = i40e_buildreg_itr(I40E_RX_ITR,\n+\t\t\t\t\t q_vector->rx.target_itr);\n+\t\tq_vector->rx.current_itr = q_vector->rx.target_itr;\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n \t} else {\n+\t\t/* No ITR update, lowest priority */\n \t\tintval = i40e_buildreg_itr(I40E_ITR_NONE, 0);\n+\t\tif (q_vector->itr_countdown)\n+\t\t\tq_vector->itr_countdown--;\n \t}\n \n \tif (!test_bit(__I40E_VSI_DOWN, vsi->state))\n \t\twr32(hw, INTREG(q_vector->reg_idx), intval);\n-\n-\tif (q_vector->itr_countdown)\n-\t\tq_vector->itr_countdown--;\n-\telse\n-\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n }\n \n /**\ndiff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h\nindex 59acc25..9f1788e 100644\n--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h\n+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h\n@@ -460,20 +460,19 @@ static inline void set_ring_xdp(struct i40e_ring *ring)\n \tring->flags |= I40E_TXR_FLAGS_XDP;\n }\n \n-enum i40e_latency_range {\n-\tI40E_LOWEST_LATENCY = 0,\n-\tI40E_LOW_LATENCY = 1,\n-\tI40E_BULK_LATENCY = 2,\n-};\n+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002\n+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002\n+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e\n+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000\n+#define I40E_ITR_ADAPTIVE_BULK 0x0000\n+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))\n \n struct i40e_ring_container {\n-\t/* array of pointers to rings */\n-\tstruct i40e_ring *ring;\n+\tstruct i40e_ring *ring;\t\t/* pointer to linked list of ring(s) */\n+\tunsigned long next_update;\t/* jiffies value of next update */\n \tunsigned int total_bytes;\t/* total bytes processed this int */\n \tunsigned int total_packets;\t/* total packets processed this int */\n-\tunsigned long last_itr_update;\t/* jiffies of last ITR update */\n \tu16 count;\n-\tenum i40e_latency_range latency_range;\n \tu16 target_itr;\t\t\t/* target ITR setting for ring(s) */\n \tu16 current_itr;\t\t/* current ITR setting for ring(s) */\n };\ndiff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c\nindex 1f130e9..eb8f3e3 100644\n--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c\n+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c\n@@ -392,97 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)\n \t val);\n }\n \n+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,\n+\t\t\t\t\tstruct i40e_ring_container *rc)\n+{\n+\treturn &q_vector->rx == rc;\n+}\n+\n+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)\n+{\n+\tunsigned int divisor;\n+\n+\tswitch (q_vector->adapter->link_speed) {\n+\tcase I40E_LINK_SPEED_40GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;\n+\t\tbreak;\n+\tcase I40E_LINK_SPEED_25GB:\n+\tcase I40E_LINK_SPEED_20GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;\n+\t\tbreak;\n+\tdefault:\n+\tcase I40E_LINK_SPEED_10GB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;\n+\t\tbreak;\n+\tcase I40E_LINK_SPEED_1GB:\n+\tcase I40E_LINK_SPEED_100MB:\n+\t\tdivisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;\n+\t\tbreak;\n+\t}\n+\n+\treturn divisor;\n+}\n+\n /**\n- * i40e_set_new_dynamic_itr - Find new ITR level\n+ * i40e_update_itr - update the dynamic ITR value based on statistics\n+ * @q_vector: structure containing interrupt and ring information\n * @rc: structure containing ring performance data\n *\n- * Returns true if ITR changed, false if not\n- *\n- * Stores a new ITR value based on packets and byte counts during\n- * the last interrupt. The advantage of per interrupt computation\n- * is faster updates and more accurate ITR for the current traffic\n- * pattern. Constants in this function were computed based on\n- * theoretical maximum wire speed and thresholds were set based on\n- * testing data as well as attempting to minimize response time\n+ * Stores a new ITR value based on packets and byte\n+ * counts during the last interrupt. The advantage of per interrupt\n+ * computation is faster updates and more accurate ITR for the current\n+ * traffic pattern. Constants in this function were computed\n+ * based on theoretical maximum wire speed and thresholds were set based\n+ * on testing data as well as attempting to minimize response time\n * while increasing bulk throughput.\n **/\n-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)\n+static void i40e_update_itr(struct i40e_q_vector *q_vector,\n+\t\t\t struct i40e_ring_container *rc)\n {\n-\tenum i40e_latency_range new_latency_range = rc->latency_range;\n-\tint bytes_per_usec;\n-\tunsigned int usecs, estimated_usecs;\n+\tunsigned int avg_wire_size, packets, bytes, itr;\n+\tunsigned long next_update = jiffies;\n \n+\t/* If we don't have any rings just leave ourselves set for maximum\n+\t * possible latency so we take ourselves out of the equation.\n+\t */\n \tif (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))\n-\t\treturn false;\n+\t\treturn;\n \n-\tif (!rc->total_packets || !rc->current_itr)\n-\t\treturn false;\n+\t/* For Rx we want to push the delay up and default to low latency.\n+\t * for Tx we want to pull the delay down and default to high latency.\n+\t */\n+\titr = i40e_container_is_rx(q_vector, rc) ?\n+\t I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :\n+\t I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;\n+\n+\t/* If we didn't update within up to 1 - 2 jiffies we can assume\n+\t * that either packets are coming in so slow there hasn't been\n+\t * any work, or that there is so much work that NAPI is dealing\n+\t * with interrupt moderation and we don't need to do anything.\n+\t */\n+\tif (time_after(next_update, rc->next_update))\n+\t\tgoto clear_counts;\n+\n+\t/* If itr_countdown is set it means we programmed an ITR within\n+\t * the last 4 interrupt cycles. This has a side effect of us\n+\t * potentially firing an early interrupt. In order to work around\n+\t * this we need to throw out any data received for a few\n+\t * interrupts following the update.\n+\t */\n+\tif (q_vector->itr_countdown) {\n+\t\titr = rc->target_itr;\n+\t\tgoto clear_counts;\n+\t}\n \n-\tusecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;\n-\tbytes_per_usec = rc->total_bytes / usecs;\n+\tpackets = rc->total_packets;\n+\tbytes = rc->total_bytes;\n \n-\t/* The calculations in this algorithm depend on interrupts actually\n-\t * firing at the ITR rate. This may not happen if the packet rate is\n-\t * really low, or if we've been napi polling. Check to make sure\n-\t * that's not the case before we continue.\n+\tif (i40e_container_is_rx(q_vector, rc)) {\n+\t\t/* If Rx there are 1 to 4 packets and bytes are less than\n+\t\t * 9000 assume insufficient data to use bulk rate limiting\n+\t\t * approach unless Tx is already in bulk rate limiting. We\n+\t\t * are likely latency driven.\n+\t\t */\n+\t\tif (packets && packets < 4 && bytes < 9000 &&\n+\t\t (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {\n+\t\t\titr = I40E_ITR_ADAPTIVE_LATENCY;\n+\t\t\tgoto adjust_by_size;\n+\t\t}\n+\t} else if (packets < 4) {\n+\t\t/* If we have Tx and Rx ITR maxed and Tx ITR is running in\n+\t\t * bulk mode and we are receiving 4 or fewer packets just\n+\t\t * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so\n+\t\t * that the Rx can relax.\n+\t\t */\n+\t\tif (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&\n+\t\t (q_vector->rx.target_itr & I40E_ITR_MASK) ==\n+\t\t I40E_ITR_ADAPTIVE_MAX_USECS)\n+\t\t\tgoto clear_counts;\n+\t} else if (packets > 32) {\n+\t\t/* If we have processed over 32 packets in a single interrupt\n+\t\t * for Tx assume we need to switch over to \"bulk\" mode.\n+\t\t */\n+\t\trc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;\n+\t}\n+\n+\t/* We have no packets to actually measure against. This means\n+\t * either one of the other queues on this vector is active or\n+\t * we are a Tx queue doing TSO with too high of an interrupt rate.\n+\t *\n+\t * Between 4 and 56 we can assume that our current interrupt delay\n+\t * is only slightly too low. As such we should increase it by a small\n+\t * fixed amount.\n \t */\n-\testimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);\n-\tif (estimated_usecs > usecs) {\n-\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tgoto reset_latency;\n+\tif (packets < 56) {\n+\t\titr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;\n+\t\tif ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {\n+\t\t\titr &= I40E_ITR_ADAPTIVE_LATENCY;\n+\t\t\titr += I40E_ITR_ADAPTIVE_MAX_USECS;\n+\t\t}\n+\t\tgoto clear_counts;\n \t}\n \n-\t/* simple throttlerate management\n-\t * 0-10MB/s lowest (50000 ints/s)\n-\t * 10-20MB/s low (20000 ints/s)\n-\t * 20-1249MB/s bulk (18000 ints/s)\n+\tif (packets <= 256) {\n+\t\titr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);\n+\t\titr &= I40E_ITR_MASK;\n+\n+\t\t/* Between 56 and 112 is our \"goldilocks\" zone where we are\n+\t\t * working out \"just right\". Just report that our current\n+\t\t * ITR is good for us.\n+\t\t */\n+\t\tif (packets <= 112)\n+\t\t\tgoto clear_counts;\n+\n+\t\t/* If packet count is 128 or greater we are likely looking\n+\t\t * at a slight overrun of the delay we want. Try halving\n+\t\t * our delay to see if that will cut the number of packets\n+\t\t * in half per interrupt.\n+\t\t */\n+\t\titr /= 2;\n+\t\titr &= I40E_ITR_MASK;\n+\t\tif (itr < I40E_ITR_ADAPTIVE_MIN_USECS)\n+\t\t\titr = I40E_ITR_ADAPTIVE_MIN_USECS;\n+\n+\t\tgoto clear_counts;\n+\t}\n+\n+\t/* The paths below assume we are dealing with a bulk ITR since\n+\t * number of packets is greater than 256. We are just going to have\n+\t * to compute a value and try to bring the count under control,\n+\t * though for smaller packet sizes there isn't much we can do as\n+\t * NAPI polling will likely be kicking in sooner rather than later.\n+\t */\n+\titr = I40E_ITR_ADAPTIVE_BULK;\n+\n+adjust_by_size:\n+\t/* If packet counts are 256 or greater we can assume we have a gross\n+\t * overestimation of what the rate should be. Instead of trying to fine\n+\t * tune it just use the formula below to try and dial in an exact value\n+\t * give the current packet size of the frame.\n+\t */\n+\tavg_wire_size = bytes / packets;\n+\n+\t/* The following is a crude approximation of:\n+\t * wmem_default / (size + overhead) = desired_pkts_per_int\n+\t * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate\n+\t * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value\n \t *\n-\t * The math works out because the divisor is in 10^(-6) which\n-\t * turns the bytes/us input value into MB/s values, but\n-\t * make sure to use usecs, as the register values written\n-\t * are in 2 usec increments in the ITR registers, and make sure\n-\t * to use the smoothed values that the countdown timer gives us.\n+\t * Assuming wmem_default is 212992 and overhead is 640 bytes per\n+\t * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the\n+\t * formula down to\n+\t *\n+\t * (170 * (size + 24)) / (size + 640) = ITR\n+\t *\n+\t * We first do some math on the packet size and then finally bitshift\n+\t * by 8 after rounding up. We also have to account for PCIe link speed\n+\t * difference as ITR scales based on this.\n \t */\n-\tswitch (new_latency_range) {\n-\tcase I40E_LOWEST_LATENCY:\n-\t\tif (bytes_per_usec > 10)\n-\t\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tbreak;\n-\tcase I40E_LOW_LATENCY:\n-\t\tif (bytes_per_usec > 20)\n-\t\t\tnew_latency_range = I40E_BULK_LATENCY;\n-\t\telse if (bytes_per_usec <= 10)\n-\t\t\tnew_latency_range = I40E_LOWEST_LATENCY;\n-\t\tbreak;\n-\tcase I40E_BULK_LATENCY:\n-\tdefault:\n-\t\tif (bytes_per_usec <= 20)\n-\t\t\tnew_latency_range = I40E_LOW_LATENCY;\n-\t\tbreak;\n+\tif (avg_wire_size <= 60) {\n+\t\t/* Start at 250k ints/sec */\n+\t\tavg_wire_size = 4096;\n+\t} else if (avg_wire_size <= 380) {\n+\t\t/* 250K ints/sec to 60K ints/sec */\n+\t\tavg_wire_size *= 40;\n+\t\tavg_wire_size += 1696;\n+\t} else if (avg_wire_size <= 1084) {\n+\t\t/* 60K ints/sec to 36K ints/sec */\n+\t\tavg_wire_size *= 15;\n+\t\tavg_wire_size += 11452;\n+\t} else if (avg_wire_size <= 1980) {\n+\t\t/* 36K ints/sec to 30K ints/sec */\n+\t\tavg_wire_size *= 5;\n+\t\tavg_wire_size += 22420;\n+\t} else {\n+\t\t/* plateau at a limit of 30K ints/sec */\n+\t\tavg_wire_size = 32256;\n \t}\n \n-reset_latency:\n-\trc->latency_range = new_latency_range;\n+\t/* If we are in low latency mode halve our delay which doubles the\n+\t * rate to somewhere between 100K to 16K ints/sec\n+\t */\n+\tif (itr & I40E_ITR_ADAPTIVE_LATENCY)\n+\t\tavg_wire_size /= 2;\n \n-\tswitch (new_latency_range) {\n-\tcase I40E_LOWEST_LATENCY:\n-\t\trc->target_itr = I40E_ITR_50K;\n-\t\tbreak;\n-\tcase I40E_LOW_LATENCY:\n-\t\trc->target_itr = I40E_ITR_20K;\n-\t\tbreak;\n-\tcase I40E_BULK_LATENCY:\n-\t\trc->target_itr = I40E_ITR_18K;\n-\t\tbreak;\n-\tdefault:\n-\t\tbreak;\n+\t/* Resultant value is 256 times larger than it needs to be. This\n+\t * gives us room to adjust the value as needed to either increase\n+\t * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.\n+\t *\n+\t * Use addition as we have already recorded the new latency flag\n+\t * for the ITR value.\n+\t */\n+\titr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *\n+\t I40E_ITR_ADAPTIVE_MIN_INC;\n+\n+\tif ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {\n+\t\titr &= I40E_ITR_ADAPTIVE_LATENCY;\n+\t\titr += I40E_ITR_ADAPTIVE_MAX_USECS;\n \t}\n \n+clear_counts:\n+\t/* write back value */\n+\trc->target_itr = itr;\n+\n+\t/* next update should occur within next jiffy */\n+\trc->next_update = next_update + 1;\n+\n \trc->total_bytes = 0;\n \trc->total_packets = 0;\n-\trc->last_itr_update = jiffies;\n-\n-\treturn rc->target_itr != rc->current_itr;\n }\n \n /**\n@@ -1486,6 +1630,15 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)\n /* a small macro to shorten up some long lines */\n #define INTREG I40E_VFINT_DYN_CTLN1\n \n+/* The act of updating the ITR will cause it to immediately trigger. In order\n+ * to prevent this from throwing off adaptive update statistics we defer the\n+ * update so that it can only happen so often. So after either Tx or Rx are\n+ * updated we make the adaptive scheme wait until either the ITR completely\n+ * expires via the next_update expiration or we have been through at least\n+ * 3 interrupts.\n+ */\n+#define ITR_COUNTDOWN_START 3\n+\n /**\n * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt\n * @vsi: the VSI we care about\n@@ -1496,64 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,\n \t\t\t\t\t struct i40e_q_vector *q_vector)\n {\n \tstruct i40e_hw *hw = &vsi->back->hw;\n-\tbool rx = false, tx = false;\n \tu32 intval;\n \n-\t/* avoid dynamic calculation if in countdown mode */\n-\tif (q_vector->itr_countdown > 0)\n-\t\tgoto enable_int;\n+\t/* These will do nothing if dynamic updates are not enabled */\n+\ti40e_update_itr(q_vector, &q_vector->tx);\n+\ti40e_update_itr(q_vector, &q_vector->rx);\n \n-\t/* these will return false if dynamic mode is disabled */\n-\trx = i40e_set_new_dynamic_itr(&q_vector->rx);\n-\ttx = i40e_set_new_dynamic_itr(&q_vector->tx);\n-\n-\tif (rx || tx) {\n-\t\t/* get the higher of the two ITR adjustments and\n-\t\t * use the same value for both ITR registers\n-\t\t * when in adaptive mode (Rx and/or Tx)\n-\t\t */\n-\t\tu16 itr = max(q_vector->tx.target_itr,\n-\t\t\t q_vector->rx.target_itr);\n-\n-\t\tq_vector->tx.target_itr = itr;\n-\t\tq_vector->rx.target_itr = itr;\n-\t}\n-\n-enable_int:\n-\tif (q_vector->rx.target_itr != q_vector->rx.current_itr) {\n+\t/* This block of logic allows us to get away with only updating\n+\t * one ITR value with each interrupt. The idea is to perform a\n+\t * pseudo-lazy update with the following criteria.\n+\t *\n+\t * 1. Rx is given higher priority than Tx if both are in same state\n+\t * 2. If we must reduce an ITR that is given highest priority.\n+\t * 3. We then give priority to increasing ITR based on amount.\n+\t */\n+\tif (q_vector->rx.target_itr < q_vector->rx.current_itr) {\n+\t\t/* Rx ITR needs to be reduced, this is highest priority */\n \t\tintval = i40e_buildreg_itr(I40E_RX_ITR,\n \t\t\t\t\t q_vector->rx.target_itr);\n \t\tq_vector->rx.current_itr = q_vector->rx.target_itr;\n-\n-\t\tif (q_vector->tx.target_itr != q_vector->tx.current_itr) {\n-\t\t\t/* set the INTENA_MSK_MASK so that this first write\n-\t\t\t * won't actually enable the interrupt, instead just\n-\t\t\t * updating the ITR (it's bit 31 PF and VF)\n-\t\t\t *\n-\t\t\t * don't check _DOWN because interrupt isn't being\n-\t\t\t * enabled\n-\t\t\t */\n-\t\t\twr32(hw, INTREG(q_vector->reg_idx),\n-\t\t\t intval | BIT(31));\n-\t\t\t/* now that Rx is done process Tx update */\n-\t\t\tgoto update_tx;\n-\t\t}\n-\t} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {\n-update_tx:\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\t} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||\n+\t\t ((q_vector->rx.target_itr - q_vector->rx.current_itr) <\n+\t\t (q_vector->tx.target_itr - q_vector->tx.current_itr))) {\n+\t\t/* Tx ITR needs to be reduced, this is second priority\n+\t\t * Tx ITR needs to be increased more than Rx, fourth priority\n+\t\t */\n \t\tintval = i40e_buildreg_itr(I40E_TX_ITR,\n \t\t\t\t\t q_vector->tx.target_itr);\n \t\tq_vector->tx.current_itr = q_vector->tx.target_itr;\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n+\t} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {\n+\t\t/* Rx ITR needs to be increased, third priority */\n+\t\tintval = i40e_buildreg_itr(I40E_RX_ITR,\n+\t\t\t\t\t q_vector->rx.target_itr);\n+\t\tq_vector->rx.current_itr = q_vector->rx.target_itr;\n+\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n \t} else {\n+\t\t/* No ITR update, lowest priority */\n \t\tintval = i40e_buildreg_itr(I40E_ITR_NONE, 0);\n+\t\tif (q_vector->itr_countdown)\n+\t\t\tq_vector->itr_countdown--;\n \t}\n \n \tif (!test_bit(__I40E_VSI_DOWN, vsi->state))\n \t\twr32(hw, INTREG(q_vector->reg_idx), intval);\n-\n-\tif (q_vector->itr_countdown)\n-\t\tq_vector->itr_countdown--;\n-\telse\n-\t\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n }\n \n /**\ndiff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h\nindex 5a6012b..54af658 100644\n--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h\n+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h\n@@ -428,20 +428,19 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)\n \tring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;\n }\n \n-enum i40e_latency_range {\n-\tI40E_LOWEST_LATENCY = 0,\n-\tI40E_LOW_LATENCY = 1,\n-\tI40E_BULK_LATENCY = 2,\n-};\n+#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002\n+#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002\n+#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e\n+#define I40E_ITR_ADAPTIVE_LATENCY 0x8000\n+#define I40E_ITR_ADAPTIVE_BULK 0x0000\n+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))\n \n struct i40e_ring_container {\n-\t/* array of pointers to rings */\n-\tstruct i40e_ring *ring;\n+\tstruct i40e_ring *ring;\t\t/* pointer to linked list of ring(s) */\n+\tunsigned long next_update;\t/* jiffies value of next update */\n \tunsigned int total_bytes;\t/* total bytes processed this int */\n \tunsigned int total_packets;\t/* total packets processed this int */\n-\tunsigned long last_itr_update;\t/* jiffies of last ITR update */\n \tu16 count;\n-\tenum i40e_latency_range latency_range;\n \tu16 target_itr;\t\t\t/* target ITR setting for ring(s) */\n \tu16 current_itr;\t\t/* current ITR setting for ring(s) */\n };\ndiff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h\nindex 9690c1e..b6991e8 100644\n--- a/drivers/net/ethernet/intel/i40evf/i40evf.h\n+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h\n@@ -117,9 +117,8 @@ struct i40e_q_vector {\n \tstruct i40e_ring_container rx;\n \tstruct i40e_ring_container tx;\n \tu32 ring_mask;\n+\tu8 itr_countdown;\t/* when 0 should adjust adaptive ITR */\n \tu8 num_ringpairs;\t/* total number of ring pairs in vector */\n-#define ITR_COUNTDOWN_START 100\n-\tu8 itr_countdown;\t/* when 0 or 1 update ITR */\n \tu16 v_idx;\t\t/* index in the vsi->q_vector array. */\n \tu16 reg_idx;\t\t/* register index of the interrupt */\n \tchar name[IFNAMSIZ + 15];\ndiff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c\nindex 3bf6a12..6fd0992 100644\n--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c\n+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c\n@@ -353,10 +353,9 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)\n \trx_ring->vsi = &adapter->vsi;\n \tq_vector->rx.ring = rx_ring;\n \tq_vector->rx.count++;\n-\tq_vector->rx.latency_range = I40E_LOW_LATENCY;\n+\tq_vector->rx.next_update = jiffies + 1;\n \tq_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);\n \tq_vector->ring_mask |= BIT(r_idx);\n-\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n \twr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),\n \t q_vector->rx.current_itr);\n \tq_vector->rx.current_itr = q_vector->rx.target_itr;\n@@ -380,9 +379,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)\n \ttx_ring->vsi = &adapter->vsi;\n \tq_vector->tx.ring = tx_ring;\n \tq_vector->tx.count++;\n-\tq_vector->tx.latency_range = I40E_LOW_LATENCY;\n+\tq_vector->tx.next_update = jiffies + 1;\n \tq_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);\n-\tq_vector->itr_countdown = ITR_COUNTDOWN_START;\n \tq_vector->num_ringpairs++;\n \twr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),\n \t q_vector->tx.target_itr);\n", "prefixes": [ "next", "S85-V1", "14/14" ] }