diff mbox

[v10,09/18] powerpc/powernv: Extend PCI bridge resources

Message ID 1463726502-14679-10-git-send-email-gwshan@linux.vnet.ibm.com
State Not Applicable
Headers show

Commit Message

Gavin Shan May 20, 2016, 6:41 a.m. UTC
The PCI slots are associated with root port or downstream ports
of the PCIe switch connected to root port. When adapter is hot
added to the PCI slot, it usually requests more IO or memory
resource from the directly connected parent bridge (port) and
update the bridge's windows accordingly. The resource windows
of upstream bridges can't be updated automatically. It possibly
leads to unbalanced resource across the bridges: The window of
downstream bridge is overruning that of upstream bridge. The
IO or MMIO path won't work.

This resolves the above issue by extending bridge windows of
root port and upstream port of the PCIe switch connected to
the root port to PHB's windows.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 46 +++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

Comments

Alexey Kardashevskiy June 8, 2016, 3:47 a.m. UTC | #1
On 20/05/16 16:41, Gavin Shan wrote:
> The PCI slots are associated with root port or downstream ports
> of the PCIe switch connected to root port. When adapter is hot
> added to the PCI slot, it usually requests more IO or memory
> resource from the directly connected parent bridge (port) and
> update the bridge's windows accordingly. The resource windows
> of upstream bridges can't be updated automatically. It possibly
> leads to unbalanced resource across the bridges: The window of
> downstream bridge is overruning that of upstream bridge. The
> IO or MMIO path won't work.
> 
> This resolves the above issue by extending bridge windows of
> root port and upstream port of the PCIe switch connected to
> the root port to PHB's windows.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>


This breaks Garrison machine (g86l):

EEH: Frozen PE#f9 on PHB#5 detected
EEH: PE location: Backplane PLX, PHB location: N/A
EEH: This PCI device has failed 1 times in the last hour
EEH: Notify device drivers to shutdown
EEH: Collect temporary log


PHB#5 has a boot device so we end up in initramdisk.


│0005:03:00.0 USB controller: Texas Instruments TUSB73x0 SuperSpeed USB 3.0
xHCI Host Controller (rev 02)
│0005:04:00.0 SATA controller: Marvell Technology Group Ltd. 88SE9235 PCIe
2.0 x2 4-port SATA 6 Gb/s Controller (rev 11)
│0005:05:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge
(rev 03)
│0005:06:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED
Graphics Family (rev 30)



> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 46 +++++++++++++++++++++++++++++++
>  1 file changed, 46 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 3186a29..e97a5fa 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3221,6 +3221,49 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
>  	return phb->ioda.io_segsize;
>  }
>  
> +/*
> + * We are updating root port or the upstream port of the
> + * bridge behind the root port with PHB's windows in order
> + * to accommodate the changes on required resources during
> + * PCI (slot) hotplug, which is connected to either root
> + * port or the downstream ports of PCIe switch behind the
> + * root port.
> + */
> +static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
> +					   unsigned long type)
> +{
> +	struct pci_controller *hose = pci_bus_to_host(bus);
> +	struct pnv_phb *phb = hose->private_data;
> +	struct pci_dev *bridge = bus->self;
> +	struct resource *r, *w;
> +	int i;
> +
> +	/* Check if we need apply fixup to the bridge's windows */
> +	if (!pci_is_root_bus(bridge->bus) &&
> +	    !pci_is_root_bus(bridge->bus->self->bus))
> +		return;
> +
> +	/* Fixup the resources */
> +	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
> +		r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
> +		if (!r->flags || !r->parent)
> +			continue;
> +
> +		w = NULL;
> +		if (r->flags & type & IORESOURCE_IO)
> +			w = &hose->io_resource;
> +		else if (pnv_pci_is_mem_pref_64(r->flags) &&
> +			 (type & IORESOURCE_PREFETCH) &&
> +			 phb->ioda.m64_segsize)
> +			w = &hose->mem_resources[1];
> +		else if (r->flags & type & IORESOURCE_MEM)
> +			w = &hose->mem_resources[0];
> +
> +		r->start = w->start;
> +		r->end = w->end;
> +	}
> +}
> +
>  static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
>  {
>  	struct pci_controller *hose = pci_bus_to_host(bus);
> @@ -3229,6 +3272,9 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
>  	struct pnv_ioda_pe *pe;
>  	bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
>  
> +	/* Extend bridge's windows if necessary */
> +	pnv_pci_fixup_bridge_resources(bus, type);
> +
>  	/* The PE for root bus should be realized before any one else */
>  	if (!phb->ioda.root_pe_populated) {
>  		pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
>
Gavin Shan June 10, 2016, 4:33 a.m. UTC | #2
On Wed, Jun 08, 2016 at 01:47:16PM +1000, Alexey Kardashevskiy wrote:
>On 20/05/16 16:41, Gavin Shan wrote:
>> The PCI slots are associated with root port or downstream ports
>> of the PCIe switch connected to root port. When adapter is hot
>> added to the PCI slot, it usually requests more IO or memory
>> resource from the directly connected parent bridge (port) and
>> update the bridge's windows accordingly. The resource windows
>> of upstream bridges can't be updated automatically. It possibly
>> leads to unbalanced resource across the bridges: The window of
>> downstream bridge is overruning that of upstream bridge. The
>> IO or MMIO path won't work.
>> 
>> This resolves the above issue by extending bridge windows of
>> root port and upstream port of the PCIe switch connected to
>> the root port to PHB's windows.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>
>
>This breaks Garrison machine (g86l):
>
>EEH: Frozen PE#f9 on PHB#5 detected
>EEH: PE location: Backplane PLX, PHB location: N/A
>EEH: This PCI device has failed 1 times in the last hour
>EEH: Notify device drivers to shutdown
>EEH: Collect temporary log
>

Thanks for reporting the issue. I don't think the issue was caused by
the code in this patch. Actually, it's likely caused by hardware defect
- we can't set 2GB (0x80000000 - 0xffffffff) to RC's memory window.
Otherwise, it *seems* the window is disabled. I tried updating the
window with (0x80000000 - 0xffefffff) or (0x80000000 - 0xffdffff), no
EEH error was seen. I already got 0x00001000 on read despite whatever
I wrote to 0x20 reg.

The hardware is broken. In order to fix this, I intend to include a
bitmap for every PHB device node in skiboot. Kernel uses this to apply
fixup accordingly. One bit is reserved on Garrison platform to avoid
this issue. The fix can be a patch inserted before this patch in next
revision or as a followup patch after this series of patches.

>
>PHB#5 has a boot device so we end up in initramdisk.
>
>
>│0005:03:00.0 USB controller: Texas Instruments TUSB73x0 SuperSpeed USB 3.0
>xHCI Host Controller (rev 02)
>│0005:04:00.0 SATA controller: Marvell Technology Group Ltd. 88SE9235 PCIe
>2.0 x2 4-port SATA 6 Gb/s Controller (rev 11)
>│0005:05:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge
>(rev 03)
>│0005:06:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED
>Graphics Family (rev 30)
>
>
>
>> ---
>>  arch/powerpc/platforms/powernv/pci-ioda.c | 46 +++++++++++++++++++++++++++++++
>>  1 file changed, 46 insertions(+)
>> 
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 3186a29..e97a5fa 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -3221,6 +3221,49 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
>>  	return phb->ioda.io_segsize;
>>  }
>>  
>> +/*
>> + * We are updating root port or the upstream port of the
>> + * bridge behind the root port with PHB's windows in order
>> + * to accommodate the changes on required resources during
>> + * PCI (slot) hotplug, which is connected to either root
>> + * port or the downstream ports of PCIe switch behind the
>> + * root port.
>> + */
>> +static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
>> +					   unsigned long type)
>> +{
>> +	struct pci_controller *hose = pci_bus_to_host(bus);
>> +	struct pnv_phb *phb = hose->private_data;
>> +	struct pci_dev *bridge = bus->self;
>> +	struct resource *r, *w;
>> +	int i;
>> +
>> +	/* Check if we need apply fixup to the bridge's windows */
>> +	if (!pci_is_root_bus(bridge->bus) &&
>> +	    !pci_is_root_bus(bridge->bus->self->bus))
>> +		return;
>> +
>> +	/* Fixup the resources */
>> +	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
>> +		r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
>> +		if (!r->flags || !r->parent)
>> +			continue;
>> +
>> +		w = NULL;
>> +		if (r->flags & type & IORESOURCE_IO)
>> +			w = &hose->io_resource;
>> +		else if (pnv_pci_is_mem_pref_64(r->flags) &&
>> +			 (type & IORESOURCE_PREFETCH) &&
>> +			 phb->ioda.m64_segsize)
>> +			w = &hose->mem_resources[1];
>> +		else if (r->flags & type & IORESOURCE_MEM)
>> +			w = &hose->mem_resources[0];
>> +
>> +		r->start = w->start;
>> +		r->end = w->end;
>> +	}
>> +}
>> +
>>  static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
>>  {
>>  	struct pci_controller *hose = pci_bus_to_host(bus);
>> @@ -3229,6 +3272,9 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
>>  	struct pnv_ioda_pe *pe;
>>  	bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
>>  
>> +	/* Extend bridge's windows if necessary */
>> +	pnv_pci_fixup_bridge_resources(bus, type);
>> +
>>  	/* The PE for root bus should be realized before any one else */
>>  	if (!phb->ioda.root_pe_populated) {
>>  		pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
>> 
>
>
>-- 
>Alexey
>

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy June 10, 2016, 5:28 a.m. UTC | #3
On 10/06/16 14:33, Gavin Shan wrote:
> On Wed, Jun 08, 2016 at 01:47:16PM +1000, Alexey Kardashevskiy wrote:
>> On 20/05/16 16:41, Gavin Shan wrote:
>>> The PCI slots are associated with root port or downstream ports
>>> of the PCIe switch connected to root port. When adapter is hot
>>> added to the PCI slot, it usually requests more IO or memory
>>> resource from the directly connected parent bridge (port) and
>>> update the bridge's windows accordingly. The resource windows
>>> of upstream bridges can't be updated automatically. It possibly
>>> leads to unbalanced resource across the bridges: The window of
>>> downstream bridge is overruning that of upstream bridge. The
>>> IO or MMIO path won't work.
>>>
>>> This resolves the above issue by extending bridge windows of
>>> root port and upstream port of the PCIe switch connected to
>>> the root port to PHB's windows.
>>>
>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>
>>
>> This breaks Garrison machine (g86l):
>>
>> EEH: Frozen PE#f9 on PHB#5 detected
>> EEH: PE location: Backplane PLX, PHB location: N/A
>> EEH: This PCI device has failed 1 times in the last hour
>> EEH: Notify device drivers to shutdown
>> EEH: Collect temporary log
>>
> 
> Thanks for reporting the issue. I don't think the issue was caused by
> the code in this patch.


If you say so :) I am just saying that the code in this patch did trigger
the bug, I bisected the series to this patch to find this out.


> Actually, it's likely caused by hardware defect
> - we can't set 2GB (0x80000000 - 0xffffffff) to RC's memory window.
> Otherwise, it *seems* the window is disabled. I tried updating the
> window with (0x80000000 - 0xffefffff) or (0x80000000 - 0xffdffff), no
> EEH error was seen. I already got 0x00001000 on read despite whatever
> I wrote to 0x20 reg.
> 
> The hardware is broken. In order to fix this, I intend to include a
> bitmap for every PHB device node in skiboot. Kernel uses this to apply
> fixup accordingly. One bit is reserved on Garrison platform to avoid
> this issue. The fix can be a patch inserted before this patch in next
> revision

This sounds better as preserves bisectability. Thanks.


> or as a followup patch after this series of patches.
Benjamin Herrenschmidt June 10, 2016, 5:45 a.m. UTC | #4
On Fri, 2016-06-10 at 15:28 +1000, Alexey Kardashevskiy wrote:
> > Actually, it's likely caused by hardware defect
> > - we can't set 2GB (0x80000000 - 0xffffffff) to RC's memory window.
> > Otherwise, it *seems* the window is disabled. I tried updating the
> > window with (0x80000000 - 0xffefffff) or (0x80000000 - 0xffdffff), no
> > EEH error was seen. I already got 0x00001000 on read despite whatever
> > I wrote to 0x20 reg.
> > 
> > The hardware is broken. In order to fix this, I intend to include a
> > bitmap for every PHB device node in skiboot. Kernel uses this to apply
> > fixup accordingly. One bit is reserved on Garrison platform to avoid
> > this issue. The fix can be a patch inserted before this patch in next
> > revision
> 
> This sounds better as preserves bisectability. Thanks.

Ah yes they made those registers read-only. Look at my PHB4 code, I
implement a cache for them in SW.

Cheers,
Ben.

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan June 10, 2016, 6:37 a.m. UTC | #5
On Fri, Jun 10, 2016 at 03:45:30PM +1000, Benjamin Herrenschmidt wrote:
>On Fri, 2016-06-10 at 15:28 +1000, Alexey Kardashevskiy wrote:
>> > Actually, it's likely caused by hardware defect
>> > - we can't set 2GB (0x80000000 - 0xffffffff) to RC's memory window.
>> > Otherwise, it *seems* the window is disabled. I tried updating the
>> > window with (0x80000000 - 0xffefffff) or (0x80000000 - 0xffdffff), no
>> > EEH error was seen. I already got 0x00001000 on read despite whatever
>> > I wrote to 0x20 reg.
>> > 
>> > The hardware is broken. In order to fix this, I intend to include a
>> > bitmap for every PHB device node in skiboot. Kernel uses this to apply
>> > fixup accordingly. One bit is reserved on Garrison platform to avoid
>> > this issue. The fix can be a patch inserted before this patch in next
>> > revision
>> 
>> This sounds better as preserves bisectability. Thanks.
>
>Ah yes they made those registers read-only. Look at my PHB4 code, I
>implement a cache for them in SW.
>

Ben, thanks for your confirm. Could you please share the link to
your PHB4 code? I think writing to SW cache, not going to hardware
will fix the issue.

Currently, skiboot supports emulated config regiters with help of
(struct pci_cfg_reg_filter) that was introduced for CAPI M64 BAR
issue on Garrison platform. Potentially, I can have similar thing
for 0x20 (memory window) to avoid writing to the register. However,
I need take a look on your PHB4 code to see if there is anything I
can lend. Otherwise, I will reuse the struct pci_cfg_reg_filter.
At same time, I guess the bitmap (mentioned as above) is still
needed to ensure (new kernel + old skiboot) works well, but it
depends on how much Garrison boxes have been deployed.

>Cheers,
>Ben.
>

Thanks,
Gavin

>_______________________________________________
>Linuxppc-dev mailing list
>Linuxppc-dev@lists.ozlabs.org
>https://lists.ozlabs.org/listinfo/linuxppc-dev

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3186a29..e97a5fa 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3221,6 +3221,49 @@  static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 	return phb->ioda.io_segsize;
 }
 
+/*
+ * We are updating root port or the upstream port of the
+ * bridge behind the root port with PHB's windows in order
+ * to accommodate the changes on required resources during
+ * PCI (slot) hotplug, which is connected to either root
+ * port or the downstream ports of PCIe switch behind the
+ * root port.
+ */
+static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
+					   unsigned long type)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dev *bridge = bus->self;
+	struct resource *r, *w;
+	int i;
+
+	/* Check if we need apply fixup to the bridge's windows */
+	if (!pci_is_root_bus(bridge->bus) &&
+	    !pci_is_root_bus(bridge->bus->self->bus))
+		return;
+
+	/* Fixup the resources */
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+		r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
+		if (!r->flags || !r->parent)
+			continue;
+
+		w = NULL;
+		if (r->flags & type & IORESOURCE_IO)
+			w = &hose->io_resource;
+		else if (pnv_pci_is_mem_pref_64(r->flags) &&
+			 (type & IORESOURCE_PREFETCH) &&
+			 phb->ioda.m64_segsize)
+			w = &hose->mem_resources[1];
+		else if (r->flags & type & IORESOURCE_MEM)
+			w = &hose->mem_resources[0];
+
+		r->start = w->start;
+		r->end = w->end;
+	}
+}
+
 static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
 {
 	struct pci_controller *hose = pci_bus_to_host(bus);
@@ -3229,6 +3272,9 @@  static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
 	struct pnv_ioda_pe *pe;
 	bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
 
+	/* Extend bridge's windows if necessary */
+	pnv_pci_fixup_bridge_resources(bus, type);
+
 	/* The PE for root bus should be realized before any one else */
 	if (!phb->ioda.root_pe_populated) {
 		pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);