Patch Detail

GET /api/patches/786371/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 786371,
    "url": "http://patchwork.ozlabs.org/api/patches/786371/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20170710202319.22110-11-jacob.e.keller@intel.com/",
    "project": {
        "id": 46,
        "url": "http://patchwork.ozlabs.org/api/projects/46/?format=api",
        "name": "Intel Wired Ethernet development",
        "link_name": "intel-wired-lan",
        "list_id": "intel-wired-lan.osuosl.org",
        "list_email": "intel-wired-lan@osuosl.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20170710202319.22110-11-jacob.e.keller@intel.com>",
    "list_archive_url": null,
    "date": "2017-07-10T20:23:14",
    "name": "[v3,11/16] fm10k: prepare_for_reset() when we lose PCIe Link",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": false,
    "hash": "5ab96d4a23724191d6d9028c308812be14c295c1",
    "submitter": {
        "id": 9784,
        "url": "http://patchwork.ozlabs.org/api/people/9784/?format=api",
        "name": "Jacob Keller",
        "email": "jacob.e.keller@intel.com"
    },
    "delegate": {
        "id": 68,
        "url": "http://patchwork.ozlabs.org/api/users/68/?format=api",
        "username": "jtkirshe",
        "first_name": "Jeff",
        "last_name": "Kirsher",
        "email": "jeffrey.t.kirsher@intel.com"
    },
    "mbox": "http://patchwork.ozlabs.org/project/intel-wired-lan/patch/20170710202319.22110-11-jacob.e.keller@intel.com/mbox/",
    "series": [],
    "comments": "http://patchwork.ozlabs.org/api/patches/786371/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/786371/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<intel-wired-lan-bounces@osuosl.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "intel-wired-lan@lists.osuosl.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@bilbo.ozlabs.org",
            "intel-wired-lan@lists.osuosl.org"
        ],
        "Received": [
            "from hemlock.osuosl.org (smtp2.osuosl.org [140.211.166.133])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3x5xVx1jmzz9s06\n\tfor <incoming@patchwork.ozlabs.org>;\n\tTue, 11 Jul 2017 06:23:37 +1000 (AEST)",
            "from localhost (localhost [127.0.0.1])\n\tby hemlock.osuosl.org (Postfix) with ESMTP id 9728D888B0;\n\tMon, 10 Jul 2017 20:23:35 +0000 (UTC)",
            "from hemlock.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id cXvQUQ8e2XJp; Mon, 10 Jul 2017 20:23:32 +0000 (UTC)",
            "from ash.osuosl.org (ash.osuosl.org [140.211.166.34])\n\tby hemlock.osuosl.org (Postfix) with ESMTP id 9BA5688982;\n\tMon, 10 Jul 2017 20:23:31 +0000 (UTC)",
            "from silver.osuosl.org (smtp3.osuosl.org [140.211.166.136])\n\tby ash.osuosl.org (Postfix) with ESMTP id 2795E1CEAAB\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 10 Jul 2017 20:23:23 +0000 (UTC)",
            "from localhost (localhost [127.0.0.1])\n\tby silver.osuosl.org (Postfix) with ESMTP id A41282FFC7\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 10 Jul 2017 20:23:23 +0000 (UTC)",
            "from silver.osuosl.org ([127.0.0.1])\n\tby localhost (.osuosl.org [127.0.0.1]) (amavisd-new, port 10024)\n\twith ESMTP id 7AJyxoVgdn72 for <intel-wired-lan@lists.osuosl.org>;\n\tMon, 10 Jul 2017 20:23:22 +0000 (UTC)",
            "from mga14.intel.com (mga14.intel.com [192.55.52.115])\n\tby silver.osuosl.org (Postfix) with ESMTPS id B65DC2FFC6\n\tfor <intel-wired-lan@lists.osuosl.org>;\n\tMon, 10 Jul 2017 20:23:22 +0000 (UTC)",
            "from fmsmga004.fm.intel.com ([10.253.24.48])\n\tby fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t10 Jul 2017 13:23:22 -0700",
            "from jekeller-desk.amr.corp.intel.com (HELO\n\tjekeller-desk.jekeller.internal) ([134.134.177.230])\n\tby fmsmga004.fm.intel.com with ESMTP; 10 Jul 2017 13:23:21 -0700"
        ],
        "X-Virus-Scanned": [
            "amavisd-new at osuosl.org",
            "amavisd-new at osuosl.org"
        ],
        "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.40,342,1496127600\"; d=\"scan'208\";a=\"285198872\"",
        "From": "Jacob Keller <jacob.e.keller@intel.com>",
        "To": "jtkirhse@osuosl.org, Intel Wired LAN <intel-wired-lan@lists.osuosl.org>",
        "Date": "Mon, 10 Jul 2017 13:23:14 -0700",
        "Message-Id": "<20170710202319.22110-11-jacob.e.keller@intel.com>",
        "X-Mailer": "git-send-email 2.13.0.615.gb09ed6e59a40",
        "In-Reply-To": "<20170710202319.22110-1-jacob.e.keller@intel.com>",
        "References": "<20170710202319.22110-1-jacob.e.keller@intel.com>",
        "Cc": "jekeller@osuosl.org",
        "Subject": "[Intel-wired-lan] [PATCH v3 11/16] fm10k: prepare_for_reset() when\n\twe lose PCIe Link",
        "X-BeenThere": "intel-wired-lan@osuosl.org",
        "X-Mailman-Version": "2.1.18-1",
        "Precedence": "list",
        "List-Id": "Intel Wired Ethernet Linux Kernel Driver Development\n\t<intel-wired-lan.osuosl.org>",
        "List-Unsubscribe": "<https://lists.osuosl.org/mailman/options/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=unsubscribe>",
        "List-Archive": "<http://lists.osuosl.org/pipermail/intel-wired-lan/>",
        "List-Post": "<mailto:intel-wired-lan@osuosl.org>",
        "List-Help": "<mailto:intel-wired-lan-request@osuosl.org?subject=help>",
        "List-Subscribe": "<https://lists.osuosl.org/mailman/listinfo/intel-wired-lan>, \n\t<mailto:intel-wired-lan-request@osuosl.org?subject=subscribe>",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=\"us-ascii\"",
        "Content-Transfer-Encoding": "7bit",
        "Errors-To": "intel-wired-lan-bounces@osuosl.org",
        "Sender": "\"Intel-wired-lan\" <intel-wired-lan-bounces@osuosl.org>"
    },
    "content": "If we lose PCIe link, such as when an unannounced PFLR event occurs, or\nwhen a device is surprise removed, we currently detach the device and\nclose the netdev. This unfortunately leaves a lot of things still\nactive, such as the msix_mbx_pf IRQ, and Tx/Rx resources.\n\nThis can cause problems because the register reads will return\npotentially invalid values which may result in unknown driver behavior.\n\nBegin the process of resetting using fm10k_prepare_for_reset(), much in\nthe same way as the suspend and resume cycle does. This will attempt to\nshutdown as much as possible, in order to prevent possible issues.\n\nA naive implementation for this has issues, because there are now\nmultiple flows calling the reset logic and setting a reset bit. This\nwould cause problems, because the \"re-attach\" routine might call\nfm10k_handle_reset() prior to the reset actually finishing. Instead,\nwe'll add state bits to indicate which flow actually initiated the\nreset.\n\nFor the general reset flow, we'll assume that if someone else is\nresetting that we do not need to handle it at all, so it does not need\nits own state bit. For the suspend case, we will simply issue a warning\nindicating that we are attempting to recover from this case when\nresuming.\n\nFor the detached subtask, we'll simply refuse to re-attach until we've\nactually initiated a reset as part of that flow.\n\nFinally, we'll stop attempting to manage the mailbox subtask when we're\ndetached, since there's nothing we can do if we don't have a PCIe\naddress.\n\nOverall this produces a much cleaner shutdown and recovery cycle for\na PCIe surprise remove event.\n\nSigned-off-by: Jacob Keller <jacob.e.keller@intel.com>\n---\n drivers/net/ethernet/intel/fm10k/fm10k.h     |   2 +\n drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 105 ++++++++++++++++++++-------\n 2 files changed, 80 insertions(+), 27 deletions(-)",
    "diff": "diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h\nindex 689c413b7782..ba70c58ca920 100644\n--- a/drivers/net/ethernet/intel/fm10k/fm10k.h\n+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h\n@@ -270,6 +270,8 @@ enum fm10k_flags_t {\n \n enum fm10k_state_t {\n \t__FM10K_RESETTING,\n+\t__FM10K_RESET_DETACHED,\n+\t__FM10K_RESET_SUSPENDED,\n \t__FM10K_DOWN,\n \t__FM10K_SERVICE_SCHED,\n \t__FM10K_SERVICE_REQUEST,\ndiff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c\nindex cfbe25e90006..0e2bf18cc335 100644\n--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c\n+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c\n@@ -153,7 +153,15 @@ static void fm10k_service_timer(unsigned long data)\n \tfm10k_service_event_schedule(interface);\n }\n \n-static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)\n+/**\n+ * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset\n+ * @interface: fm10k private data structure\n+ *\n+ * This function prepares for a device reset by shutting as much down as we\n+ * can. It does nothing and returns false if __FM10K_RESETTING was already set\n+ * prior to calling this function. It returns true if it actually did work.\n+ */\n+static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)\n {\n \tstruct net_device *netdev = interface->netdev;\n \n@@ -162,8 +170,9 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)\n \t/* put off any impending NetWatchDogTimeout */\n \tnetif_trans_update(netdev);\n \n-\twhile (test_and_set_bit(__FM10K_RESETTING, interface->state))\n-\t\tusleep_range(1000, 2000);\n+\t/* Nothing to do if a reset is already in progress */\n+\tif (test_and_set_bit(__FM10K_RESETTING, interface->state))\n+\t\treturn false;\n \n \trtnl_lock();\n \n@@ -181,6 +190,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)\n \tinterface->last_reset = jiffies + (10 * HZ);\n \n \trtnl_unlock();\n+\n+\treturn true;\n }\n \n static int fm10k_handle_reset(struct fm10k_intfc *interface)\n@@ -189,6 +200,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)\n \tstruct fm10k_hw *hw = &interface->hw;\n \tint err;\n \n+\tWARN_ON(!test_bit(__FM10K_RESETTING, interface->state));\n+\n \trtnl_lock();\n \n \tpci_set_master(interface->pdev);\n@@ -267,35 +280,70 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface)\n \tstruct net_device *netdev = interface->netdev;\n \tu32 __iomem *hw_addr;\n \tu32 value;\n+\tint err;\n \n-\t/* do nothing if device is still present or hw_addr is set */\n+\t/* do nothing if netdev is still present or hw_addr is set */\n \tif (netif_device_present(netdev) || interface->hw.hw_addr)\n \t\treturn;\n \n+\t/* We've lost the PCIe register space, and can no longer access the\n+\t * device. Shut everything except the detach subtask down and prepare\n+\t * to reset the device in case we recover. If we actually prepare for\n+\t * reset, indicate that we're detached.\n+\t */\n+\tif (fm10k_prepare_for_reset(interface))\n+\t\tset_bit(__FM10K_RESET_DETACHED, interface->state);\n+\n \t/* check the real address space to see if we've recovered */\n \thw_addr = READ_ONCE(interface->uc_addr);\n \tvalue = readl(hw_addr);\n \tif (~value) {\n+\t\t/* Make sure the reset was initiated because we detached,\n+\t\t * otherwise we might race with a different reset flow.\n+\t\t */\n+\t\tif (!test_and_clear_bit(__FM10K_RESET_DETACHED,\n+\t\t\t\t\tinterface->state))\n+\t\t\treturn;\n+\n+\t\t/* Restore the hardware address */\n \t\tinterface->hw.hw_addr = interface->uc_addr;\n+\n+\t\t/* PCIe link has been restored, and the device is active\n+\t\t * again. Restore everything and reset the device.\n+\t\t */\n+\t\terr = fm10k_handle_reset(interface);\n+\t\tif (err) {\n+\t\t\tnetdev_err(netdev, \"Unable to reset device: %d\\n\", err);\n+\t\t\tinterface->hw.hw_addr = NULL;\n+\t\t\treturn;\n+\t\t}\n+\n+\t\t/* Re-attach the netdev */\n \t\tnetif_device_attach(netdev);\n-\t\tset_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);\n \t\tnetdev_warn(netdev, \"PCIe link restored, device now attached\\n\");\n \t\treturn;\n \t}\n-\n-\trtnl_lock();\n-\n-\tif (netif_running(netdev))\n-\t\tdev_close(netdev);\n-\n-\trtnl_unlock();\n }\n \n-static void fm10k_reinit(struct fm10k_intfc *interface)\n+static void fm10k_reset_subtask(struct fm10k_intfc *interface)\n {\n \tint err;\n \n-\tfm10k_prepare_for_reset(interface);\n+\tif (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,\n+\t\t\t\tinterface->flags))\n+\t\treturn;\n+\n+\t/* If another thread has already prepared to reset the device, we\n+\t * should not attempt to handle a reset here, since we'd race with\n+\t * that thread. This may happen if we suspend the device or if the\n+\t * PCIe link is lost. In this case, we'll just ignore the RESET\n+\t * request, as it will (eventually) be taken care of when the thread\n+\t * which actually started the reset is finished.\n+\t */\n+\tif (!fm10k_prepare_for_reset(interface))\n+\t\treturn;\n+\n+\tnetdev_err(interface->netdev, \"Reset interface\\n\");\n \n \terr = fm10k_handle_reset(interface);\n \tif (err)\n@@ -303,17 +351,6 @@ static void fm10k_reinit(struct fm10k_intfc *interface)\n \t\t\t\"fm10k_handle_reset failed: %d\\n\", err);\n }\n \n-static void fm10k_reset_subtask(struct fm10k_intfc *interface)\n-{\n-\tif (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,\n-\t\t\t\tinterface->flags))\n-\t\treturn;\n-\n-\tnetdev_err(interface->netdev, \"Reset interface\\n\");\n-\n-\tfm10k_reinit(interface);\n-}\n-\n /**\n  * fm10k_configure_swpri_map - Configure Receive SWPRI to PC mapping\n  * @interface: board private structure\n@@ -381,6 +418,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)\n  **/\n static void fm10k_mbx_subtask(struct fm10k_intfc *interface)\n {\n+\t/* If we're resetting, bail out */\n+\tif (test_bit(__FM10K_RESETTING, interface->state))\n+\t\treturn;\n+\n \t/* process upstream mailbox and update device state */\n \tfm10k_watchdog_update_host_state(interface);\n \n@@ -630,9 +671,11 @@ static void fm10k_service_task(struct work_struct *work)\n \n \tinterface = container_of(work, struct fm10k_intfc, service_task);\n \n+\t/* Check whether we're detached first */\n+\tfm10k_detach_subtask(interface);\n+\n \t/* tasks run even when interface is down */\n \tfm10k_mbx_subtask(interface);\n-\tfm10k_detach_subtask(interface);\n \tfm10k_reset_subtask(interface);\n \n \t/* tasks only run when interface is up */\n@@ -2177,7 +2220,8 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface)\n \t */\n \tfm10k_stop_service_event(interface);\n \n-\tfm10k_prepare_for_reset(interface);\n+\tif (fm10k_prepare_for_reset(interface))\n+\t\tset_bit(__FM10K_RESET_SUSPENDED, interface->state);\n }\n \n static int fm10k_handle_resume(struct fm10k_intfc *interface)\n@@ -2185,6 +2229,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)\n \tstruct fm10k_hw *hw = &interface->hw;\n \tint err;\n \n+\t/* Even if we didn't properly prepare for reset in\n+\t * fm10k_prepare_suspend, we'll attempt to resume anyways.\n+\t */\n+\tif (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state))\n+\t\tdev_warn(&interface->pdev->dev,\n+\t\t\t \"Device was shut down as part of suspend... Attempting to recover\\n\");\n+\n \t/* reset statistics starting values */\n \thw->mac.ops.rebind_hw_stats(hw, &interface->stats);\n \n",
    "prefixes": [
        "v3",
        "11/16"
    ]
}