diff mbox series

PCI/DPC: Fix shared interrupt handling

Message ID 20171214000051.30999.6989.stgit@gimli.home
State Superseded
Headers show
Series PCI/DPC: Fix shared interrupt handling | expand

Commit Message

Alex Williamson Dec. 14, 2017, 12:01 a.m. UTC
DPC supports shared interrupts, but it plays very loosely with testing
whether the interrupt is generated by DPC before generating spurious
log messages, such as:

 dpc 0000:10:01.2:pcie010: DPC containment event, status:0x1f00 source:0x0000

Testing the status register for zero or -1 is not sufficient when the
device supports the RP PIO First Error Pointer register.  Change this
to test whether the interrupt is enabled in the control register,
retaining the device present test, and that the status reports the
interrupt as signaled and DPC is triggered, clearing as a spurious
interrupt otherwise.

Additionally, since the interrupt is actually serviced by a workqueue,
disable the interrupt in the control register until that completes or
else we may never see it execute due to further incoming interrupts.
A software generated DPC floods the system otherwise.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/pci/pcie/pcie-dpc.c |   60 +++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 20 deletions(-)

Comments

Keith Busch Dec. 14, 2017, 2:50 p.m. UTC | #1
On Wed, Dec 13, 2017 at 05:01:58PM -0700, Alex Williamson wrote:
> @@ -109,6 +109,7 @@ static void interrupt_event_handler(struct work_struct *work)
>  	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
>  	struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
>  	struct pci_bus *parent = pdev->subordinate;
> +	u16 ctl;
>  
>  	pci_lock_rescan_remove();
>  	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
> @@ -135,6 +136,10 @@ static void interrupt_event_handler(struct work_struct *work)
>  
>  	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
>  		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
> +
> +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> +	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
> +			      ctl & ~PCI_EXP_DPC_CTL_INT_EN);

Did you mean to re-enable the interrupt here rather than mask it off?
Alex Williamson Dec. 14, 2017, 2:54 p.m. UTC | #2
On Thu, 14 Dec 2017 07:50:31 -0700
Keith Busch <keith.busch@intel.com> wrote:

> On Wed, Dec 13, 2017 at 05:01:58PM -0700, Alex Williamson wrote:
> > @@ -109,6 +109,7 @@ static void interrupt_event_handler(struct work_struct *work)
> >  	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
> >  	struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
> >  	struct pci_bus *parent = pdev->subordinate;
> > +	u16 ctl;
> >  
> >  	pci_lock_rescan_remove();
> >  	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
> > @@ -135,6 +136,10 @@ static void interrupt_event_handler(struct work_struct *work)
> >  
> >  	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
> >  		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
> > +
> > +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> > +	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
> > +			      ctl & ~PCI_EXP_DPC_CTL_INT_EN);  
> 
> Did you mean to re-enable the interrupt here rather than mask it off?

D'oh, yes.  v2 coming.  Thanks,

Alex
diff mbox series

Patch

diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index 2d976a623ddc..d97c7def4fd3 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -109,6 +109,7 @@  static void interrupt_event_handler(struct work_struct *work)
 	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
 	struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
 	struct pci_bus *parent = pdev->subordinate;
+	u16 ctl;
 
 	pci_lock_rescan_remove();
 	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
@@ -135,6 +136,10 @@  static void interrupt_event_handler(struct work_struct *work)
 
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
 		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
+			      ctl & ~PCI_EXP_DPC_CTL_INT_EN);
 }
 
 static void dpc_rp_pio_print_tlp_header(struct device *dev,
@@ -249,34 +254,49 @@  static irqreturn_t dpc_irq(int irq, void *context)
 	struct dpc_dev *dpc = (struct dpc_dev *)context;
 	struct pci_dev *pdev = dpc->dev->port;
 	struct device *dev = &dpc->dev->device;
-	u16 status, source;
+	u16 ctl, status, source, reason, ext_reason;
+
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+
+	if (!(ctl & PCI_EXP_DPC_CTL_INT_EN) || ctl == (u16)(~0))
+		return IRQ_NONE;
 
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
+
+	if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT))
+		return IRQ_NONE;
+
+	if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
+		pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
+				      PCI_EXP_DPC_STATUS_INTERRUPT);
+		return IRQ_HANDLED;
+	}
+
+	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
+			      ctl & ~PCI_EXP_DPC_CTL_INT_EN);
+
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_SOURCE_ID,
 			     &source);
-	if (!status || status == (u16)(~0))
-		return IRQ_NONE;
 
 	dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n",
 		status, source);
 
-	if (status & PCI_EXP_DPC_STATUS_TRIGGER) {
-		u16 reason = (status >> 1) & 0x3;
-		u16 ext_reason = (status >> 5) & 0x3;
-
-		dev_warn(dev, "DPC %s detected, remove downstream devices\n",
-			 (reason == 0) ? "unmasked uncorrectable error" :
-			 (reason == 1) ? "ERR_NONFATAL" :
-			 (reason == 2) ? "ERR_FATAL" :
-			 (ext_reason == 0) ? "RP PIO error" :
-			 (ext_reason == 1) ? "software trigger" :
-					     "reserved error");
-		/* show RP PIO error detail information */
-		if (reason == 3 && ext_reason == 0)
-			dpc_process_rp_pio_error(dpc);
-
-		schedule_work(&dpc->work);
-	}
+	reason = (status >> 1) & 0x3;
+	ext_reason = (status >> 5) & 0x3;
+
+	dev_warn(dev, "DPC %s detected, remove downstream devices\n",
+		 (reason == 0) ? "unmasked uncorrectable error" :
+		 (reason == 1) ? "ERR_NONFATAL" :
+		 (reason == 2) ? "ERR_FATAL" :
+		 (ext_reason == 0) ? "RP PIO error" :
+		 (ext_reason == 1) ? "software trigger" :
+				     "reserved error");
+	/* show RP PIO error detail information */
+	if (reason == 3 && ext_reason == 0)
+		dpc_process_rp_pio_error(dpc);
+
+	schedule_work(&dpc->work);
+
 	return IRQ_HANDLED;
 }