diff mbox series

[v5,3/5] PCI: Fix bug resulting in double hpmemsize being assigned to MMIO window

Message ID PS2P216MB0642B987B825C938A09F457180370@PS2P216MB0642.KORP216.PROD.OUTLOOK.COM
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series PCI: Patch series to support Thunderbolt without any BIOS support | expand

Commit Message

Nicholas Johnson May 5, 2019, 2:40 p.m. UTC
Background
==========================================================================

Solve bug report:
https://bugzilla.kernel.org/show_bug.cgi?id=203243

Currently, the kernel can sometimes assign the MMIO_PREF window
additional size into the MMIO window, resulting in double the MMIO
additional size, even if the MMIO_PREF window was successful.

This happens if in the first pass, the MMIO_PREF succeeds but the MMIO
fails. In the next pass, because MMIO_PREF is already assigned, the
attempt to assign MMIO_PREF returns an error code instead of success
(nothing more to do, already allocated).

Example of problem (more context can be found in the bug report URL):

Mainline kernel:
pci 0000:06:01.0: BAR 14: assigned [mem 0x90100000-0xa00fffff] = 256M
pci 0000:06:04.0: BAR 14: assigned [mem 0xa0200000-0xb01fffff] = 256M

Patched kernel:
pci 0000:06:01.0: BAR 14: assigned [mem 0x90100000-0x980fffff] = 128M
pci 0000:06:04.0: BAR 14: assigned [mem 0x98200000-0xa01fffff] = 128M

This was using pci=realloc,hpmemsize=128M,nocrs - on the same machine
with the same configuration, with a Ubuntu mainline kernel and a kernel
patched with this patch series.

This patch is vital for the next patch in the series. The next patch
allows the user to specify MMIO and MMIO_PREF independently. If the
MMIO_PREF is set to be very large, this bug will end up more than
doubling the MMIO size. The bug results in the MMIO_PREF being added to
the MMIO window, which means doubling if MMIO_PREF size == MMIO size.
With a large MMIO_PREF, without this patch, the MMIO window will likely
fail to be assigned altogether due to lack of 32-bit address space.

Patch notes
==========================================================================

Change find_free_bus_resource() to not skip assigned resources with
non-null parent.

Add checks in pbus_size_io() and pbus_size_mem() to return success if
resource returned from find_free_bus_resource() is already allocated.

This avoids pbus_size_io() and pbus_size_mem() returning error code to
__pci_bus_size_bridges() when a resource has been successfully assigned
in a previous pass. This fixes the existing behaviour where space for a
resource could be reserved multiple times in different parent bridge
windows. This also greatly reduces the number of failed BAR messages in
dmesg when Linux assigns resources.

Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
---
 drivers/pci/setup-bus.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 5214815c7..e7126cc0e 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -752,11 +752,17 @@  static void pci_bridge_check_ranges(struct pci_bus *bus)
 	}
 }
 
-/* Helper function for sizing routines: find first available
-   bus resource of a given type. Note: we intentionally skip
-   the bus resources which have already been assigned (that is,
-   have non-NULL parent resource). */
-static struct resource *find_free_bus_resource(struct pci_bus *bus,
+/*
+ * Helper function for sizing routines: find first bus resource of a given
+ * type. Note: we do not skip the bus resources which have already been
+ * assigned (r->parent != NULL). This is because a resource that is already
+ * assigned (nothing more to be done) will be indistinguishable from one that
+ * failed due to lack of space if we skip assigned resources. If the caller
+ * function cannot tell the difference then it might try to place the
+ * resources in a different window, doubling up on resources or causing
+ * unforeseeable issues.
+ */
+static struct resource *find_bus_resource_of_type(struct pci_bus *bus,
 			 unsigned long type_mask, unsigned long type)
 {
 	int i;
@@ -765,7 +771,7 @@  static struct resource *find_free_bus_resource(struct pci_bus *bus,
 	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
 			continue;
-		if (r && (r->flags & type_mask) == type && !r->parent)
+		if (r && (r->flags & type_mask) == type)
 			return r;
 	}
 	return NULL;
@@ -863,14 +869,16 @@  static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
 		resource_size_t add_size, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
-							IORESOURCE_IO);
+	struct resource *b_res = find_bus_resource_of_type(bus, IORESOURCE_IO,
+					IORESOURCE_IO);
 	resource_size_t size = 0, size0 = 0, size1 = 0;
 	resource_size_t children_add_size = 0;
 	resource_size_t min_align, align;
 
 	if (!b_res)
 		return;
+	if (b_res->parent)
+		return;
 
 	min_align = window_alignment(bus, IORESOURCE_IO);
 	list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -975,7 +983,7 @@  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	resource_size_t min_align, align, size, size0, size1;
 	resource_size_t aligns[18];	/* Alignments from 1Mb to 128Gb */
 	int order, max_order;
-	struct resource *b_res = find_free_bus_resource(bus,
+	struct resource *b_res = find_bus_resource_of_type(bus,
 					mask | IORESOURCE_PREFETCH, type);
 	resource_size_t children_add_size = 0;
 	resource_size_t children_add_align = 0;
@@ -983,6 +991,8 @@  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 
 	if (!b_res)
 		return -ENOSPC;
+	if (b_res->parent)
+		return 0;
 
 	memset(aligns, 0, sizeof(aligns));
 	max_order = 0;