From patchwork Thu May 17 07:43:03 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915153 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjw53SgTz9s33 for ; Thu, 17 May 2018 17:43:17 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752407AbeEQHnP (ORCPT ); Thu, 17 May 2018 03:43:15 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:57577 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752393AbeEQHnP (ORCPT ); Thu, 17 May 2018 03:43:15 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17447178" Received: from ironmsg-sd-alpha.qualcomm.com ([10.53.140.30]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8890"; a="195745864" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg-SD-alpha.qualcomm.com with ESMTP; 17 May 2018 00:43:13 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id 765BBE29; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 1/9] PCI: Add generic pcie_wait_for_link() interface Date: Thu, 17 May 2018 03:43:03 -0400 Message-Id: <1526542991-5291-2-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Clients such as hotplug and Downstream Port Containment (DPC) both need to wait until a link becomes active or inactive. Add a generic pcie_wait_link_active() interface and use it instead of duplicating the code. Signed-off-by: Oza Pawandeep Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 18a42f8..e0c2b8e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -231,25 +231,11 @@ bool pciehp_check_link_active(struct controller *ctrl) return ret; } -static void __pcie_wait_link_active(struct controller *ctrl, bool active) -{ - int timeout = 1000; - - if (pciehp_check_link_active(ctrl) == active) - return; - while (timeout > 0) { - msleep(10); - timeout -= 10; - if (pciehp_check_link_active(ctrl) == active) - return; - } - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n", - active ? "set" : "cleared"); -} - static void pcie_wait_link_active(struct controller *ctrl) { - __pcie_wait_link_active(ctrl, true); + struct pci_dev *pdev = ctrl_dev(ctrl); + + pcie_wait_for_link(pdev, true); } static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e597655..764bf64 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4138,6 +4138,35 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS); } +/** + * pcie_wait_for_link - Wait until link is active or inactive + * @pdev: Bridge device + * @active: waiting for active or inactive? + * + * Use this to wait till link becomes active or inactive. + */ +bool pcie_wait_for_link(struct pci_dev *pdev, bool active) +{ + int timeout = 1000; + bool ret; + u16 lnk_status; + + for (;;) { + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); + if (ret == active) + return true; + if (timeout <= 0) + break; + msleep(10); + timeout -= 10; + } + + pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", + active ? "set" : "cleared"); + + return false; +} void pci_reset_secondary_bus(struct pci_dev *dev) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 023f7cf..cec9d8c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,7 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 8c57d60..80ec384 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -68,19 +68,9 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) static void dpc_wait_link_inactive(struct dpc_dev *dpc) { - unsigned long timeout = jiffies + HZ; struct pci_dev *pdev = dpc->dev->port; - struct device *dev = &dpc->dev->device; - u16 lnk_status; - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); - while (lnk_status & PCI_EXP_LNKSTA_DLLLA && - !time_after(jiffies, timeout)) { - msleep(10); - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); - } - if (lnk_status & PCI_EXP_LNKSTA_DLLLA) - dev_warn(dev, "Link state not disabled for DPC event\n"); + pcie_wait_for_link(pdev, false); } static void dpc_work(struct work_struct *work) From patchwork Thu May 17 07:43:04 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915157 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjx76bhHz9s3c for ; Thu, 17 May 2018 17:44:11 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752616AbeEQHoD (ORCPT ); Thu, 17 May 2018 03:44:03 -0400 Received: from wolverine01.qualcomm.com ([199.106.114.254]:18821 "EHLO wolverine01.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752396AbeEQHnX (ORCPT ); Thu, 17 May 2018 03:43:23 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="340813087" Received: from unknown (HELO ironmsg05-sd.qualcomm.com) ([10.53.140.145]) by wolverine01.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="87448687" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg05-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:12 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id 8922A1F2E; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 2/9] PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices Date: Thu, 17 May 2018 03:43:04 -0400 Message-Id: <1526542991-5291-3-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org PCIe ERR_FATAL errors mean the Link is unreliable. Components on the Link may need to be reset to return to reliable operation (PCIe r4.0, sec 6.2.2). We previously handled these errors much differently depending on whether the platform supports Downstream Port Containment (DPC) (PCIe r4.0, sec 6.2.10) or not. The AER driver has historically logged the error details, called driver-supplied pci_error_handlers callbacks, and reset the Link. This reset downstream devices, but did not remove them from the PCI subsystem, re-enumerate them, or call their driver .remove() or .probe() methods. DPC is different because the hardware automatically disables the Link when it detects ERR_FATAL, which resets downstream devices. There's no opportunity for pci_error_handlers callbacks before resetting the Link. The DPC driver removes affected devices (which calls their .remove() methods), brings the Link back up, and re-enumerates (which calls driver .probe() methods). Align AER ERR_FATAL handling with DPC by resetting the Link in software, skipping the driver pci_error_handlers callbacks, removing the devices from the PCI subsystem, and re-enumerating. The idea is that drivers and devices should see the same behavior for ERR_FATAL events, regardless of whether they're handled by AER or DPC. Here are the basic ERR_FATAL recovery steps, showing the previous AER behavior, the AER behavior after this patch, and the DPC behavior: AER AER DPC previous new behavior -------- --- -------- Log error yes yes yes (minimal) drv.error_detected() yes no no Reset Link yes yes yes drv.mmio_enabled() yes no no drv.slot_reset() yes no no drv.resume() yes no no Remove PCI devices no yes yes (calls drv.remove()) Re-enumerate no yes yes (calls drv.probe()) N.B. With DPC, the Link reset happens before the driver .remove() calls, while with AER, the reset happens *after* the .remove() calls. Signed-off-by: Oza Pawandeep [bhelgaas: changelog, squash doc patch into this] Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt index 0b6bb3e..688b691 100644 --- a/Documentation/PCI/pci-error-recovery.txt +++ b/Documentation/PCI/pci-error-recovery.txt @@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error event will be platform-dependent, but will follow the general sequence described below. -STEP 0: Error Event +STEP 0: Error Event: ERR_NONFATAL ------------------- A PCI bus error is detected by the PCI hardware. On powerpc, the slot is isolated, in that all I/O is blocked: all reads return 0xffffffff, @@ -228,13 +228,7 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations). If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform proceeds to STEP 4 (Slot Reset) -STEP 3: Link Reset ------------------- -The platform resets the link. This is a PCI-Express specific step -and is done whenever a fatal error has been detected that can be -"solved" by resetting the link. - -STEP 4: Slot Reset +STEP 3: Slot Reset ------------------ In response to a return value of PCI_ERS_RESULT_NEED_RESET, the @@ -320,7 +314,7 @@ Failure). >>> However, it probably should. -STEP 5: Resume Operations +STEP 4: Resume Operations ------------------------- The platform will call the resume() callback on all affected device drivers if all drivers on the segment have returned @@ -332,7 +326,7 @@ a result code. At this point, if a new error happens, the platform will restart a new error recovery sequence. -STEP 6: Permanent Failure +STEP 5: Permanent Failure ------------------------- A "permanent failure" has occurred, and the platform cannot recover the device. The platform will call error_detected() with a @@ -355,6 +349,27 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt for additional detail on real-life experience of the causes of software errors. +STEP 0: Error Event: ERR_FATAL +------------------- +PCI bus error is detected by the PCI hardware. On powerpc, the slot is +isolated, in that all I/O is blocked: all reads return 0xffffffff, all +writes are ignored. + +STEP 1: Remove devices +-------------------- +Platform removes the devices depending on the error agent, it could be +this port for all subordinates or upstream component (likely downstream +port) + +STEP 2: Reset link +-------------------- +The platform resets the link. This is a PCI-Express specific step and is +done whenever a fatal error has been detected that can be "solved" by +resetting the link. + +STEP 3: Re-enumerate the devices +-------------------- +Initiates the re-enumeration. Conclusion; General Remarks --------------------------- diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 779b387..377e576 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -353,10 +353,7 @@ static void aer_error_resume(struct pci_dev *dev) pos = dev->aer_cap; pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); - if (dev->error_state == pci_channel_io_normal) - status &= ~mask; /* Clear corresponding nonfatal bits */ - else - status &= mask; /* Clear corresponding fatal bits */ + status &= ~mask; /* Clear corresponding nonfatal bits */ pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 0ea5acc..b56f9c1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -20,6 +20,7 @@ #include #include #include "aerdrv.h" +#include "../../pci.h" #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -475,35 +476,82 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) } /** - * do_recovery - handle nonfatal/fatal error recovery process + * do_fatal_recovery - handle fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error - * @severity: error severity type * - * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * Invoked when an error is fatal. Once being invoked, removes the devices + * beneath this AER agent, followed by reset link e.g. secondary bus reset + * followed by re-enumeration of devices. + */ +static void do_fatal_recovery(struct pci_dev *dev) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result; + struct aer_broadcast_data result_data; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, + bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, + pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev); + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + pci_cleanup_aer_uncorrect_error_status(dev); + } + + if (result == PCI_ERS_RESULT_RECOVERED) { + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + } else { + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_info(dev, "AER: Device recovery from fatal error failed\n"); + } + + pci_unlock_rescan_remove(); +} + +/** + * do_nonfatal_recovery - handle nonfatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is nonfatal. Once being invoked, broadcast * error detected message to all downstream drivers within a hierarchy in * question and return the returned code. */ -static void do_recovery(struct pci_dev *dev, int severity) +static void do_nonfatal_recovery(struct pci_dev *dev) { - pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t status; enum pci_channel_state state; - if (severity == AER_FATAL) - state = pci_channel_io_frozen; - else - state = pci_channel_io_normal; + state = pci_channel_io_normal; status = broadcast_error_message(dev, state, "error_detected", report_error_detected); - if (severity == AER_FATAL) { - result = reset_link(dev); - if (result != PCI_ERS_RESULT_RECOVERED) - goto failed; - } - if (status == PCI_ERS_RESULT_CAN_RECOVER) status = broadcast_error_message(dev, state, @@ -562,8 +610,10 @@ static void handle_error_source(struct pcie_device *aerdev, if (pos) pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); - } else - do_recovery(dev, info->severity); + } else if (info->severity == AER_NONFATAL) + do_nonfatal_recovery(dev); + else if (info->severity == AER_FATAL) + do_fatal_recovery(dev); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -627,8 +677,10 @@ static void aer_recover_work_func(struct work_struct *work) continue; } cper_print_aer(pdev, entry.severity, entry.regs); - if (entry.severity != AER_CORRECTABLE) - do_recovery(pdev, entry.severity); + if (entry.severity == AER_NONFATAL) + do_nonfatal_recovery(pdev); + else if (entry.severity == AER_FATAL) + do_fatal_recovery(pdev); pci_dev_put(pdev); } } From patchwork Thu May 17 07:43:05 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915165 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjzm2wJwz9s33 for ; Thu, 17 May 2018 17:46:28 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751888AbeEQHqN (ORCPT ); Thu, 17 May 2018 03:46:13 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:57577 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752415AbeEQHnQ (ORCPT ); Thu, 17 May 2018 03:43:16 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17447181" Received: from ironmsg03-sd.qualcomm.com ([10.53.140.143]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:15 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="163842601" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg03-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:13 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id 91A971F2C; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 3/9] PCI/AER: Rename error recovery interfaces to generic PCI naming Date: Thu, 17 May 2018 03:43:05 -0400 Message-Id: <1526542991-5291-4-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Rename error recovery interfaces with "pcie_" prefix so they can be made non-static. Signed-off-by: Oza Pawandeep [bhelgaas: move declaration to later patch, leave functions static] Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index b56f9c1..a2d7cc7 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -476,14 +476,14 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) } /** - * do_fatal_recovery - handle fatal error recovery process + * pcie_do_fatal_recovery - handle fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error * * Invoked when an error is fatal. Once being invoked, removes the devices * beneath this AER agent, followed by reset link e.g. secondary bus reset * followed by re-enumeration of devices. */ -static void do_fatal_recovery(struct pci_dev *dev) +static void pcie_do_fatal_recovery(struct pci_dev *dev) { struct pci_dev *udev; struct pci_bus *parent; @@ -533,14 +533,14 @@ static void do_fatal_recovery(struct pci_dev *dev) } /** - * do_nonfatal_recovery - handle nonfatal error recovery process + * pcie_do_nonfatal_recovery - handle nonfatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error * * Invoked when an error is nonfatal. Once being invoked, broadcast * error detected message to all downstream drivers within a hierarchy in * question and return the returned code. */ -static void do_nonfatal_recovery(struct pci_dev *dev) +static void pcie_do_nonfatal_recovery(struct pci_dev *dev) { pci_ers_result_t status; enum pci_channel_state state; @@ -611,9 +611,9 @@ static void handle_error_source(struct pcie_device *aerdev, pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); } else if (info->severity == AER_NONFATAL) - do_nonfatal_recovery(dev); + pcie_do_nonfatal_recovery(dev); else if (info->severity == AER_FATAL) - do_fatal_recovery(dev); + pcie_do_fatal_recovery(dev); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -678,9 +678,9 @@ static void aer_recover_work_func(struct work_struct *work) } cper_print_aer(pdev, entry.severity, entry.regs); if (entry.severity == AER_NONFATAL) - do_nonfatal_recovery(pdev); + pcie_do_nonfatal_recovery(pdev); else if (entry.severity == AER_FATAL) - do_fatal_recovery(pdev); + pcie_do_fatal_recovery(pdev); pci_dev_put(pdev); } } From patchwork Thu May 17 07:43:06 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915166 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjzr6P5sz9s33 for ; Thu, 17 May 2018 17:46:32 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752533AbeEQHqb (ORCPT ); Thu, 17 May 2018 03:46:31 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:57577 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752404AbeEQHnP (ORCPT ); Thu, 17 May 2018 03:43:15 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17447180" Received: from ironmsg04-sd.qualcomm.com ([10.53.140.144]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="87424422" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg04-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:12 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id A1F111F30; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 4/9] PCI/AER: Factor out error reporting to drivers/pci/pcie/err.c Date: Thu, 17 May 2018 03:43:06 -0400 Message-Id: <1526542991-5291-5-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Move the error reporting callbacks from aerdrv_core.c to err.c, where they can be used by DPC in addition to AER. As part of aerdrv_core.c, these callbacks were built under CONFIG_PCIEAER. Moving them to the new err.c means they will now be built under CONFIG_PCIEPORTBUS, so adjust the definition of pci_uevent_ers() to match. Signed-off-by: Oza Pawandeep [bhelgaas: in reset_link(), initialize "driver" even if CONFIG_PCIEAER is unset, update pci_uevent_ers() #ifdef wrapper] Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 6ace470..ffb9564 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1535,7 +1535,7 @@ static int pci_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) /** * pci_uevent_ers - emit a uevent during recovery path of PCI device * @pdev: PCI device undergoing error recovery diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index cec9d8c..5e8857a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,10 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +/* PCI error reporting and recovery */ +void pcie_do_fatal_recovery(struct pci_dev *dev); +void pcie_do_nonfatal_recovery(struct pci_dev *dev); + bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 800e1d4..03f4e0b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 08b4584..b4c9506 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -76,36 +76,6 @@ struct aer_rpc { */ }; -struct aer_broadcast_data { - enum pci_channel_state state; - enum pci_ers_result result; -}; - -static inline pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) -{ - if (new == PCI_ERS_RESULT_NO_AER_DRIVER) - return PCI_ERS_RESULT_NO_AER_DRIVER; - - if (new == PCI_ERS_RESULT_NONE) - return orig; - - switch (orig) { - case PCI_ERS_RESULT_CAN_RECOVER: - case PCI_ERS_RESULT_RECOVERED: - orig = new; - break; - case PCI_ERS_RESULT_DISCONNECT: - if (new == PCI_ERS_RESULT_NEED_RESET) - orig = PCI_ERS_RESULT_NEED_RESET; - break; - default: - break; - } - - return orig; -} - extern struct bus_type pcie_port_bus_type; void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index a2d7cc7..4fa1ee4 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,191 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int report_error_detected(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - dev->error_state = result_data->state; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->error_detected) { - if (result_data->state == pci_channel_io_frozen && - dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - /* - * In case of fatal recovery, if one of down- - * stream device has no driver. We might be - * unable to recover because a later insmod - * of a driver for this device is unaware of - * its hw state. - */ - pci_printk(KERN_DEBUG, dev, "device has %s\n", - dev->driver ? - "no AER-aware driver" : "no driver"); - } - - /* - * If there's any device in the subtree that does not - * have an error_detected callback, returning - * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of - * the subsequent mmio_enabled/slot_reset/resume - * callbacks of "any" device in the subtree. All the - * devices in the subtree are left in the error state - * without recovery. - */ - - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else - vote = PCI_ERS_RESULT_NONE; - } else { - err_handler = dev->driver->err_handler; - vote = err_handler->error_detected(dev, result_data->state); - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - } - - result_data->result = merge_result(result_data->result, vote); - device_unlock(&dev->dev); - return 0; -} - -static int report_mmio_enabled(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->mmio_enabled) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->mmio_enabled(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_slot_reset(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->slot_reset) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->slot_reset(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_resume(struct pci_dev *dev, void *data) -{ - const struct pci_error_handlers *err_handler; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->resume) - goto out; - - err_handler = dev->driver->err_handler; - err_handler->resume(dev); - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); -out: - device_unlock(&dev->dev); - return 0; -} - -/** - * broadcast_error_message - handle message broadcast to downstream drivers - * @dev: pointer to from where in a hierarchy message is broadcasted down - * @state: error state - * @error_mesg: message to print - * @cb: callback to be broadcasted - * - * Invoked during error recovery process. Once being invoked, the content - * of error severity will be broadcasted to all downstream drivers in a - * hierarchy in question. - */ -static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, - enum pci_channel_state state, - char *error_mesg, - int (*cb)(struct pci_dev *, void *)) -{ - struct aer_broadcast_data result_data; - - pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); - result_data.state = state; - if (cb == report_error_detected) - result_data.result = PCI_ERS_RESULT_CAN_RECOVER; - else - result_data.result = PCI_ERS_RESULT_RECOVERED; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* - * If the error is reported by a bridge, we think this error - * is related to the downstream link of the bridge, so we - * do error recovery on all subordinates of the bridge instead - * of the bridge and clear the error status of the bridge. - */ - if (cb == report_error_detected) - dev->error_state = state; - pci_walk_bus(dev->subordinate, cb, &result_data); - if (cb == report_resume) { - pci_cleanup_aer_uncorrect_error_status(dev); - dev->error_state = pci_channel_io_normal; - } - } else { - /* - * If the error is reported by an end point, we think this - * error is related to the upstream link of the end point. - */ - if (state == pci_channel_io_normal) - /* - * the error is non fatal so the bus is ok, just invoke - * the callback for the function that logged the error. - */ - cb(dev, &result_data); - else - pci_walk_bus(dev->bus, cb, &result_data); - } - - return result_data.result; -} - -/** - * default_reset_link - default reset function - * @dev: pointer to pci_dev data structure - * - * Invoked when performing link reset on a Downstream Port or a - * Root Port with no aer driver. - */ -static pci_ers_result_t default_reset_link(struct pci_dev *dev) -{ - pci_reset_bridge_secondary_bus(dev); - pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); - return PCI_ERS_RESULT_RECOVERED; -} - static int find_aer_service_iter(struct device *device, void *data) { struct pcie_port_service_driver *service_driver, **drv; @@ -430,7 +245,7 @@ static int find_aer_service_iter(struct device *device, void *data) return 0; } -static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) +struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) { struct pcie_port_service_driver *drv = NULL; @@ -439,154 +254,6 @@ static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) return drv; } -static pci_ers_result_t reset_link(struct pci_dev *dev) -{ - struct pci_dev *udev; - pci_ers_result_t status; - struct pcie_port_service_driver *driver; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* Reset this port for all subordinates */ - udev = dev; - } else { - /* Reset the upstream component (likely downstream port) */ - udev = dev->bus->self; - } - - /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); - - if (driver && driver->reset_link) { - status = driver->reset_link(udev); - } else if (udev->has_secondary_link) { - status = default_reset_link(udev); - } else { - pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (status != PCI_ERS_RESULT_RECOVERED) { - pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - return status; -} - -/** - * pcie_do_fatal_recovery - handle fatal error recovery process - * @dev: pointer to a pci_dev data structure of agent detecting an error - * - * Invoked when an error is fatal. Once being invoked, removes the devices - * beneath this AER agent, followed by reset link e.g. secondary bus reset - * followed by re-enumeration of devices. - */ -static void pcie_do_fatal_recovery(struct pci_dev *dev) -{ - struct pci_dev *udev; - struct pci_bus *parent; - struct pci_dev *pdev, *temp; - pci_ers_result_t result; - struct aer_broadcast_data result_data; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - udev = dev; - else - udev = dev->bus->self; - - parent = udev->subordinate; - pci_lock_rescan_remove(); - list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, - bus_list) { - pci_dev_get(pdev); - pci_dev_set_disconnected(pdev, NULL); - if (pci_has_subordinate(pdev)) - pci_walk_bus(pdev->subordinate, - pci_dev_set_disconnected, NULL); - pci_stop_and_remove_bus_device(pdev); - pci_dev_put(pdev); - } - - result = reset_link(udev); - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* - * If the error is reported by a bridge, we think this error - * is related to the downstream link of the bridge, so we - * do error recovery on all subordinates of the bridge instead - * of the bridge and clear the error status of the bridge. - */ - pci_cleanup_aer_uncorrect_error_status(dev); - } - - if (result == PCI_ERS_RESULT_RECOVERED) { - if (pcie_wait_for_link(udev, true)) - pci_rescan_bus(udev->bus); - } else { - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); - pci_info(dev, "AER: Device recovery from fatal error failed\n"); - } - - pci_unlock_rescan_remove(); -} - -/** - * pcie_do_nonfatal_recovery - handle nonfatal error recovery process - * @dev: pointer to a pci_dev data structure of agent detecting an error - * - * Invoked when an error is nonfatal. Once being invoked, broadcast - * error detected message to all downstream drivers within a hierarchy in - * question and return the returned code. - */ -static void pcie_do_nonfatal_recovery(struct pci_dev *dev) -{ - pci_ers_result_t status; - enum pci_channel_state state; - - state = pci_channel_io_normal; - - status = broadcast_error_message(dev, - state, - "error_detected", - report_error_detected); - - if (status == PCI_ERS_RESULT_CAN_RECOVER) - status = broadcast_error_message(dev, - state, - "mmio_enabled", - report_mmio_enabled); - - if (status == PCI_ERS_RESULT_NEED_RESET) { - /* - * TODO: Should call platform-specific - * functions to reset slot before calling - * drivers' slot_reset callbacks? - */ - status = broadcast_error_message(dev, - state, - "slot_reset", - report_slot_reset); - } - - if (status != PCI_ERS_RESULT_RECOVERED) - goto failed; - - broadcast_error_message(dev, - state, - "resume", - report_resume); - - pci_info(dev, "AER: Device recovery successful\n"); - return; - -failed: - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); - /* TODO: Should kernel panic here? */ - pci_info(dev, "AER: Device recovery failed\n"); -} - /** * handle_error_source - handle logging error into an event log * @aerdev: pointer to pcie_device data structure of the root port diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c new file mode 100644 index 0000000..307120b --- /dev/null +++ b/drivers/pci/pcie/err.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file implements the error recovery as a core part of PCIe error + * reporting. When a PCIe error is delivered, an error message will be + * collected and printed to console, then, an error recovery procedure + * will be executed by following the PCI error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#include +#include +#include +#include +#include +#include +#include "portdrv.h" +#include "../pci.h" + +struct aer_broadcast_data { + enum pci_channel_state state; + enum pci_ers_result result; +}; + +static pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) + return PCI_ERS_RESULT_NO_AER_DRIVER; + + if (new == PCI_ERS_RESULT_NONE) + return orig; + + switch (orig) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + orig = new; + break; + case PCI_ERS_RESULT_DISCONNECT: + if (new == PCI_ERS_RESULT_NEED_RESET) + orig = PCI_ERS_RESULT_NEED_RESET; + break; + default: + break; + } + + return orig; +} + +static int report_error_detected(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + dev->error_state = result_data->state; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->error_detected) { + if (result_data->state == pci_channel_io_frozen && + dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { + /* + * In case of fatal recovery, if one of down- + * stream device has no driver. We might be + * unable to recover because a later insmod + * of a driver for this device is unaware of + * its hw state. + */ + pci_printk(KERN_DEBUG, dev, "device has %s\n", + dev->driver ? + "no AER-aware driver" : "no driver"); + } + + /* + * If there's any device in the subtree that does not + * have an error_detected callback, returning + * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of + * the subsequent mmio_enabled/slot_reset/resume + * callbacks of "any" device in the subtree. All the + * devices in the subtree are left in the error state + * without recovery. + */ + + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) + vote = PCI_ERS_RESULT_NO_AER_DRIVER; + else + vote = PCI_ERS_RESULT_NONE; + } else { + err_handler = dev->driver->err_handler; + vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); + } + + result_data->result = merge_result(result_data->result, vote); + device_unlock(&dev->dev); + return 0; +} + +static int report_mmio_enabled(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->mmio_enabled) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->mmio_enabled(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_slot_reset(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->slot_reset) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->slot_reset(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_resume(struct pci_dev *dev, void *data) +{ + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + dev->error_state = pci_channel_io_normal; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->resume) + goto out; + + err_handler = dev->driver->err_handler; + err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +out: + device_unlock(&dev->dev); + return 0; +} + +/** + * default_reset_link - default reset function + * @dev: pointer to pci_dev data structure + * + * Invoked when performing link reset on a Downstream Port or a + * Root Port with no aer driver. + */ +static pci_ers_result_t default_reset_link(struct pci_dev *dev) +{ + pci_reset_bridge_secondary_bus(dev); + pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t reset_link(struct pci_dev *dev) +{ + struct pci_dev *udev; + pci_ers_result_t status; + struct pcie_port_service_driver *driver = NULL; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* Reset this port for all subordinates */ + udev = dev; + } else { + /* Reset the upstream component (likely downstream port) */ + udev = dev->bus->self; + } + +#if IS_ENABLED(CONFIG_PCIEAER) + /* Use the aer driver of the component firstly */ + driver = find_aer_service(udev); +#endif + + if (driver && driver->reset_link) { + status = driver->reset_link(udev); + } else if (udev->has_secondary_link) { + status = default_reset_link(udev); + } else { + pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", + pci_name(udev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (status != PCI_ERS_RESULT_RECOVERED) { + pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", + pci_name(udev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + return status; +} + +/** + * broadcast_error_message - handle message broadcast to downstream drivers + * @dev: pointer to from where in a hierarchy message is broadcasted down + * @state: error state + * @error_mesg: message to print + * @cb: callback to be broadcasted + * + * Invoked during error recovery process. Once being invoked, the content + * of error severity will be broadcasted to all downstream drivers in a + * hierarchy in question. + */ +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, + enum pci_channel_state state, + char *error_mesg, + int (*cb)(struct pci_dev *, void *)) +{ + struct aer_broadcast_data result_data; + + pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); + result_data.state = state; + if (cb == report_error_detected) + result_data.result = PCI_ERS_RESULT_CAN_RECOVER; + else + result_data.result = PCI_ERS_RESULT_RECOVERED; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + if (cb == report_error_detected) + dev->error_state = state; + pci_walk_bus(dev->subordinate, cb, &result_data); + if (cb == report_resume) { + pci_cleanup_aer_uncorrect_error_status(dev); + dev->error_state = pci_channel_io_normal; + } + } else { + /* + * If the error is reported by an end point, we think this + * error is related to the upstream link of the end point. + */ + if (state == pci_channel_io_normal) + /* + * the error is non fatal so the bus is ok, just invoke + * the callback for the function that logged the error. + */ + cb(dev, &result_data); + else + pci_walk_bus(dev->bus, cb, &result_data); + } + + return result_data.result; +} + +/** + * pcie_do_fatal_recovery - handle fatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is fatal. Once being invoked, removes the devices + * beneath this AER agent, followed by reset link e.g. secondary bus reset + * followed by re-enumeration of devices. + */ +void pcie_do_fatal_recovery(struct pci_dev *dev) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result; + struct aer_broadcast_data result_data; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, + bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, + pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev); + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + pci_cleanup_aer_uncorrect_error_status(dev); + } + + if (result == PCI_ERS_RESULT_RECOVERED) { + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + pci_info(dev, "Device recovery from fatal error successful\n"); + } else { + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_info(dev, "Device recovery from fatal error failed\n"); + } + + pci_unlock_rescan_remove(); +} + +/** + * pcie_do_nonfatal_recovery - handle nonfatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * error detected message to all downstream drivers within a hierarchy in + * question and return the returned code. + */ +void pcie_do_nonfatal_recovery(struct pci_dev *dev) +{ + pci_ers_result_t status; + enum pci_channel_state state; + + state = pci_channel_io_normal; + + status = broadcast_error_message(dev, + state, + "error_detected", + report_error_detected); + + if (status == PCI_ERS_RESULT_CAN_RECOVER) + status = broadcast_error_message(dev, + state, + "mmio_enabled", + report_mmio_enabled); + + if (status == PCI_ERS_RESULT_NEED_RESET) { + /* + * TODO: Should call platform-specific + * functions to reset slot before calling + * drivers' slot_reset callbacks? + */ + status = broadcast_error_message(dev, + state, + "slot_reset", + report_slot_reset); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + broadcast_error_message(dev, + state, + "resume", + report_resume); + + pci_info(dev, "AER: Device recovery successful\n"); + return; + +failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ + pci_info(dev, "AER: Device recovery failed\n"); +} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index d0c6783..47c9824 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -112,4 +112,5 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ +struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev); #endif /* _PORTDRV_H_ */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 73178a2..4f721f7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2284,7 +2284,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif From patchwork Thu May 17 07:43:07 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915160 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjy21cFPz9s3X for ; Thu, 17 May 2018 17:44:58 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752500AbeEQHnS (ORCPT ); Thu, 17 May 2018 03:43:18 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:57577 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752442AbeEQHnQ (ORCPT ); Thu, 17 May 2018 03:43:16 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17447184" Received: from ironmsg-sd-alpha.qualcomm.com ([10.53.140.30]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:15 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8890"; a="195745873" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg-SD-alpha.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id AC70B1F2F; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 5/9] PCI/portdrv: Add generic pcie_port_find_service() Date: Thu, 17 May 2018 03:43:07 -0400 Message-Id: <1526542991-5291-6-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Add generic pcie_port_find_service() routine. Signed-off-by: Oza Pawandeep Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 4fa1ee4..fdfc474 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,32 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int find_aer_service_iter(struct device *device, void *data) -{ - struct pcie_port_service_driver *service_driver, **drv; - - drv = (struct pcie_port_service_driver **) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - service_driver = to_service_driver(device->driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - *drv = service_driver; - return 1; - } - } - - return 0; -} - -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) -{ - struct pcie_port_service_driver *drv = NULL; - - device_for_each_child(&dev->dev, &drv, find_aer_service_iter); - - return drv; -} - /** * handle_error_source - handle logging error into an event log * @aerdev: pointer to pcie_device data structure of the root port diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 307120b..a1668e9 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -194,10 +194,8 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) udev = dev->bus->self; } -#if IS_ENABLED(CONFIG_PCIEAER) /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); -#endif + driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); if (driver && driver->reset_link) { status = driver->reset_link(udev); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 47c9824..ba6c963 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -112,5 +112,6 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev); +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index c9c0663..e5bbf08 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -18,6 +18,10 @@ #include "../pci.h" #include "portdrv.h" +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + u32 service; +}; /** * release_pcie_device - free PCI Express port service device structure @@ -398,6 +402,46 @@ static int remove_iter(struct device *dev, void *data) return 0; } +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == &pcie_port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + return 1; + } + } + + return 0; +} +/** + * pcie_port_find_service - find the service driver + * @dev: PCI Express port the service is associated with + * @service: Service to find + * + * Find PCI Express port service driver associated with given service + */ +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service) +{ + struct pcie_port_service_driver *drv; + struct portdrv_service_data pdrvs; + + pdrvs.drv = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + drv = pdrvs.drv; + return drv; +} + /** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with From patchwork Thu May 17 07:43:08 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915158 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjxt2MzCz9s33 for ; Thu, 17 May 2018 17:44:50 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752508AbeEQHnU (ORCPT ); Thu, 17 May 2018 03:43:20 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:59609 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752455AbeEQHnQ (ORCPT ); Thu, 17 May 2018 03:43:16 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17713698" Received: from ironmsg02-sd.qualcomm.com ([10.53.140.142]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:16 -0700 Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg02-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id B6D091F31; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 6/9] PCI/portdrv: Add generic pcie_port_find_device() Date: Thu, 17 May 2018 03:43:08 -0400 Message-Id: <1526542991-5291-7-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Add generic pcie_port_find_device() routine. Signed-off-by: Oza Pawandeep Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index ba6c963..896608a 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -114,4 +114,6 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, u32 service); +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index e5bbf08..a5b3b3a 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -20,6 +20,7 @@ #include "portdrv.h" struct portdrv_service_data { struct pcie_port_service_driver *drv; + struct device *dev; u32 service; }; @@ -415,6 +416,7 @@ static int find_service_iter(struct device *device, void *data) service_driver = to_service_driver(device->driver); if (service_driver->service == service) { pdrvs->drv = service_driver; + pdrvs->dev = device; return 1; } } @@ -443,6 +445,27 @@ struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, } /** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service is associated with + * @service: For the service to find + * + * Find the struct device associated with given service on a pci_dev + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + device = pdrvs.dev; + return device; +} + +/** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with * From patchwork Thu May 17 07:43:09 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915155 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjwY2zwSz9s33 for ; Thu, 17 May 2018 17:43:41 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752572AbeEQHna (ORCPT ); Thu, 17 May 2018 03:43:30 -0400 Received: from wolverine01.qualcomm.com ([199.106.114.254]:18837 "EHLO wolverine01.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752558AbeEQHnY (ORCPT ); Thu, 17 May 2018 03:43:24 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="340813096" Received: from unknown (HELO ironmsg05-sd.qualcomm.com) ([10.53.140.145]) by wolverine01.qualcomm.com with ESMTP; 17 May 2018 00:43:15 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="87448696" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg05-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id BF2EA1F32; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 7/9] PCI/DPC: Disable ERR_NONFATAL handling by DPC Date: Thu, 17 May 2018 03:43:09 -0400 Message-Id: <1526542991-5291-8-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org PCIe ERR_NONFATAL errors mean a particular transaction is unreliable but the Link is otherwise fully functional (PCIe r4.0, sec 6.2.2). The AER driver handles these by logging the error details and calling driver-supplied pci_error_handlers callbacks. It does not reset downstream devices, does not remove them from the PCI subsystem, does not re-enumerate them, and does not call their driver .remove() or .probe() methods. But DPC driver previously enabled DPC on ERR_NONFATAL, so if the hardware supports DPC, these errors caused a Link reset (performed automatically by the hardware), followed by the DPC driver removing affected devices (which calls their .remove() methods), bringing the Link back up, and re-enumerating (which calls driver .probe() methods). Disable ERR_NONFATAL DPC triggering so these errors will only be handled by AER. This means drivers won't have to deal with different usage of their pci_error_handlers callbacks and .probe() and .remove() methods based on whether the platform has DPC support. Signed-off-by: Oza Pawandeep [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 80ec384..361903f 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -260,7 +260,7 @@ static int dpc_probe(struct pcie_device *dev) } } - ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN; + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -278,7 +278,7 @@ static void dpc_remove(struct pcie_device *dev) u16 ctl; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 103ba79..5182e0d 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -981,6 +981,7 @@ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL 6 /* DPC control */ +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ From patchwork Thu May 17 07:43:10 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915159 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjxy3XB9z9s33 for ; Thu, 17 May 2018 17:44:54 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752414AbeEQHnT (ORCPT ); Thu, 17 May 2018 03:43:19 -0400 Received: from alexa-out.qualcomm.com ([129.46.98.28]:18258 "EHLO alexa-out.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752396AbeEQHnQ (ORCPT ); Thu, 17 May 2018 03:43:16 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="17447187" Received: from ironmsg03-sd.qualcomm.com ([10.53.140.143]) by alexa-out.qualcomm.com with ESMTP; 17 May 2018 00:43:15 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="163842609" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg03-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id C990E1F33; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 8/9] PCI/AER: Pass service type to pcie_do_fatal_recovery() Date: Thu, 17 May 2018 03:43:10 -0400 Message-Id: <1526542991-5291-9-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Pass the service type to pcie_do_fatal_recovery() instead of assuming AER. We will make DPC also use pcie_do_fatal_recovery(), and it needs to do things a little differently for AER and DPC. Signed-off-by: Oza Pawandeep [bhelgaas: split to separate patch] Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 5e8857a..6af7595 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -354,7 +354,7 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); /* PCI error reporting and recovery */ -void pcie_do_fatal_recovery(struct pci_dev *dev); +void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service); void pcie_do_nonfatal_recovery(struct pci_dev *dev); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index fdfc474..36e622d 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -254,7 +254,7 @@ static void handle_error_source(struct pcie_device *aerdev, } else if (info->severity == AER_NONFATAL) pcie_do_nonfatal_recovery(dev); else if (info->severity == AER_FATAL) - pcie_do_fatal_recovery(dev); + pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -321,7 +321,7 @@ static void aer_recover_work_func(struct work_struct *work) if (entry.severity == AER_NONFATAL) pcie_do_nonfatal_recovery(pdev); else if (entry.severity == AER_FATAL) - pcie_do_fatal_recovery(pdev); + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER); pci_dev_put(pdev); } } diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index a1668e9..8d68cd7 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -180,7 +180,7 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) return PCI_ERS_RESULT_RECOVERED; } -static pci_ers_result_t reset_link(struct pci_dev *dev) +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) { struct pci_dev *udev; pci_ers_result_t status; @@ -195,7 +195,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) } /* Use the aer driver of the component firstly */ - driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); + driver = pcie_port_find_service(udev, service); if (driver && driver->reset_link) { status = driver->reset_link(udev); @@ -281,7 +281,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, * beneath this AER agent, followed by reset link e.g. secondary bus reset * followed by re-enumeration of devices. */ -void pcie_do_fatal_recovery(struct pci_dev *dev) +void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service) { struct pci_dev *udev; struct pci_bus *parent; @@ -307,9 +307,10 @@ void pcie_do_fatal_recovery(struct pci_dev *dev) pci_dev_put(pdev); } - result = reset_link(udev); + result = reset_link(udev, service); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + if ((service == PCIE_PORT_SERVICE_AER) && + (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) { /* * If the error is reported by a bridge, we think this error * is related to the downstream link of the bridge, so we From patchwork Thu May 17 07:43:11 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oza Pawandeep X-Patchwork-Id: 915156 X-Patchwork-Delegate: bhelgaas@google.com Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-pci-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40mjwx4DJlz9s33 for ; Thu, 17 May 2018 17:44:01 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752589AbeEQHnr (ORCPT ); Thu, 17 May 2018 03:43:47 -0400 Received: from wolverine01.qualcomm.com ([199.106.114.254]:18821 "EHLO wolverine01.qualcomm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752552AbeEQHnY (ORCPT ); Thu, 17 May 2018 03:43:24 -0400 X-IronPort-AV: E=Sophos;i="5.49,409,1520924400"; d="scan'208";a="340813093" Received: from unknown (HELO ironmsg05-sd.qualcomm.com) ([10.53.140.145]) by wolverine01.qualcomm.com with ESMTP; 17 May 2018 00:43:15 -0700 X-IronPort-AV: E=McAfee;i="5900,7806,8895"; a="87448695" Received: from westreach.qualcomm.com ([10.228.196.125]) by ironmsg05-sd.qualcomm.com with ESMTP; 17 May 2018 00:43:14 -0700 Received: by westreach.qualcomm.com (Postfix, from userid 467151) id D606D1F34; Thu, 17 May 2018 03:43:12 -0400 (EDT) From: Oza Pawandeep To: Bjorn Helgaas , Philippe Ombredanne , Thomas Gleixner , Greg Kroah-Hartman , Kate Stewart , linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, Dongdong Liu , Keith Busch , Wei Zhang , Sinan Kaya , Timur Tabi Cc: Oza Pawandeep , Bjorn Helgaas Subject: [PATCH v17 9/9] PCI/DPC: Use the generic pcie_do_fatal_recovery() path Date: Thu, 17 May 2018 03:43:11 -0400 Message-Id: <1526542991-5291-10-git-send-email-poza@codeaurora.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1526542991-5291-1-git-send-email-poza@codeaurora.org> References: <1526542991-5291-1-git-send-email-poza@codeaurora.org> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Our goal is to handle ERR_FATAL errors similarly, whether they are reported via AER or via DPC. A previous commit changed AER so it handles ERR_FATAL by calling driver .remove() methods and resetting the Link. DPC already does that (although the Link reset is done automatically by hardware and happens before we call the driver .remove() methods). Restructure the DPC code so it calls the same pcie_do_fatal_recovery() interface used by AER. This makes it clearer that we want to use the same path. Implement the .reset_link() method used by pcie_do_fatal_recovery(). For DPC, the actual reset is done automatically by hardware, so we really only have to wait for the Link to be inactive, then release the Port from DPC. Signed-off-by: Oza Pawandeep [bhelgaas: changelog, DPC_FATAL is not a bitfield, can be sequential] Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 361903f..6064041 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -73,29 +73,30 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc) pcie_wait_for_link(pdev, false); } -static void dpc_work(struct work_struct *work) +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { - struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); - struct pci_dev *dev, *temp, *pdev = dpc->dev->port; - struct pci_bus *parent = pdev->subordinate; - u16 cap = dpc->cap_pos, ctl; - - pci_lock_rescan_remove(); - list_for_each_entry_safe_reverse(dev, temp, &parent->devices, - bus_list) { - pci_dev_get(dev); - pci_dev_set_disconnected(dev, NULL); - if (pci_has_subordinate(dev)) - pci_walk_bus(dev->subordinate, - pci_dev_set_disconnected, NULL); - pci_stop_and_remove_bus_device(dev); - pci_dev_put(dev); - } - pci_unlock_rescan_remove(); - + struct dpc_dev *dpc; + struct pcie_device *pciedev; + struct device *devdpc; + u16 cap, ctl; + + /* + * DPC disables the Link automatically in hardware, so it has + * already been reset by the time we get here. + */ + devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC); + pciedev = to_pcie_device(devdpc); + dpc = get_service_data(pciedev); + cap = dpc->cap_pos; + + /* + * Wait until the Link is inactive, then clear DPC Trigger Status + * to allow the Port to leave DPC. + */ dpc_wait_link_inactive(dpc); + if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) - return; + return PCI_ERS_RESULT_DISCONNECT; if (dpc->rp_extensions && dpc->rp_pio_status) { pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, dpc->rp_pio_status); @@ -108,6 +109,17 @@ static void dpc_work(struct work_struct *work) pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl); pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL, ctl | PCI_EXP_DPC_CTL_INT_EN); + + return PCI_ERS_RESULT_RECOVERED; +} + +static void dpc_work(struct work_struct *work) +{ + struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); + struct pci_dev *pdev = dpc->dev->port; + + /* We configure DPC so it only triggers on ERR_FATAL */ + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC); } static void dpc_process_rp_pio_error(struct dpc_dev *dpc) @@ -288,6 +300,7 @@ static struct pcie_port_service_driver dpcdriver = { .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, + .reset_link = dpc_reset_link, }; static int __init dpc_service_init(void) diff --git a/include/linux/aer.h b/include/linux/aer.h index 8f87bbe..514bffa 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -14,6 +14,7 @@ #define AER_NONFATAL 0 #define AER_FATAL 1 #define AER_CORRECTABLE 2 +#define DPC_FATAL 3 struct pci_dev;