[v8,1/6] PCI: Consider alignment of hot-added bridges when distributing available resources
diff mbox series

Message ID SL2P216MB01871E87E3A760E3AA87E27380C00@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series
  • Patch series to support Thunderbolt without any BIOS support
Related show

Commit Message

Nicholas Johnson July 26, 2019, 12:53 p.m. UTC
Rewrite pci_bus_distribute_available_resources to better handle bridges
with different resource alignment requirements. Pass more details
arguments recursively to track the resource start and end addresses
relative to the initial hotplug bridge. This is especially useful for
Thunderbolt with native PCI enumeration, enabling external graphics
cards and other devices with bridge alignment higher than 1MB.

Change extend_bridge_window to resize the actual resource, rather than
using add_list and dev_res->add_size. If an additional resource entry
exists for the given resource, zero out the add_size field to avoid it
interfering. Because add_size is considered optional when allocating,
using add_size could cause issues in some cases, because successful
resource distribution requires sizes to be guaranteed. Such cases
include hot-adding nested hotplug bridges in one enumeration, and
potentially others which are yet to be encountered.

Solves bug report: https://bugzilla.kernel.org/show_bug.cgi?id=199581

Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
---
 drivers/pci/setup-bus.c | 148 +++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 77 deletions(-)

Comments

Mika Westerberg Oct. 8, 2019, 11:38 a.m. UTC | #1
Hi Nicholas,

On Fri, Jul 26, 2019 at 12:53:19PM +0000, Nicholas Johnson wrote:
> Rewrite pci_bus_distribute_available_resources to better handle bridges
> with different resource alignment requirements. Pass more details
> arguments recursively to track the resource start and end addresses
> relative to the initial hotplug bridge. This is especially useful for
> Thunderbolt with native PCI enumeration, enabling external graphics
> cards and other devices with bridge alignment higher than 1MB.
> 
> Change extend_bridge_window to resize the actual resource, rather than
> using add_list and dev_res->add_size. If an additional resource entry
> exists for the given resource, zero out the add_size field to avoid it
> interfering. Because add_size is considered optional when allocating,
> using add_size could cause issues in some cases, because successful
> resource distribution requires sizes to be guaranteed. Such cases
> include hot-adding nested hotplug bridges in one enumeration, and
> potentially others which are yet to be encountered.
> 
> Solves bug report: https://bugzilla.kernel.org/show_bug.cgi?id=199581

Here better to use:

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199581

> Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>

This solves the issue I reported so,

Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>

There are a couple of comments below.

> Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
> ---
>  drivers/pci/setup-bus.c | 148 +++++++++++++++++++---------------------
>  1 file changed, 71 insertions(+), 77 deletions(-)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index 79b1fa651..6835fd64c 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -1840,12 +1840,10 @@ static void extend_bridge_window(struct pci_dev *bridge, struct resource *res,
>  }
>  
>  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> -					    struct list_head *add_list,
> -					    resource_size_t available_io,
> -					    resource_size_t available_mmio,
> -					    resource_size_t available_mmio_pref)
> +	struct list_head *add_list, struct resource io,
> +	struct resource mmio, struct resource mmio_pref)

You pass a copy of each resource because you modify it inplace. I wonder
if it makes more sense to explicitly take a copy here with comments?

>  {
> -	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> +	resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
>  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
>  	struct resource *io_res, *mmio_res, *mmio_pref_res;
>  	struct pci_dev *dev, *bridge = bus->self;
> @@ -1855,15 +1853,29 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  	mmio_pref_res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
>  
>  	/*
> -	 * Update additional resource list (add_list) to fill all the
> -	 * extra resource space available for this port except the space
> -	 * calculated in __pci_bus_size_bridges() which covers all the
> -	 * devices currently connected to the port and below.
> +	 * The alignment of this bridge is yet to be considered, hence it must
> +	 * be done now before extending its bridge window.
>  	 */
> -	extend_bridge_window(bridge, io_res, add_list, available_io);
> -	extend_bridge_window(bridge, mmio_res, add_list, available_mmio);
> +	align = pci_resource_alignment(bridge, io_res);
> +	if (!io_res->parent && align)
> +		io.start = ALIGN(io.start, align);
> +
> +	align = pci_resource_alignment(bridge, mmio_res);
> +	if (!mmio_res->parent && align)
> +		mmio.start = ALIGN(mmio.start, align);
> +
> +	align = pci_resource_alignment(bridge, mmio_pref_res);
> +	if (!mmio_pref_res->parent && align)
> +		mmio_pref.start = ALIGN(mmio_pref.start, align);
> +
> +	/*
> +	 * Update the resources to fill as much remaining resource space in the
> +	 * parent bridge as possible, while considering alignment.
> +	 */
> +	extend_bridge_window(bridge, io_res, add_list, resource_size(&io));
> +	extend_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
>  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> -			     available_mmio_pref);
> +		resource_size(&mmio_pref));

I think this should be aligned like:

 	extend_bridge_window(bridge, mmio_pref_res, add_list,
			     resource_size(&mmio_pref));


>  
>  	/*
>  	 * Calculate how many hotplug bridges and normal bridges there
> @@ -1884,108 +1896,90 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  	 */
>  	if (hotplug_bridges + normal_bridges == 1) {
>  		dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
> -		if (dev->subordinate) {
> +		if (dev->subordinate)
>  			pci_bus_distribute_available_resources(dev->subordinate,
> -				add_list, available_io, available_mmio,
> -				available_mmio_pref);
> -		}
> +				add_list, io, mmio, mmio_pref);
>  		return;
>  	}
>  
> -	if (hotplug_bridges == 0)
> -		return;
> -
>  	/*
> -	 * Calculate the total amount of extra resource space we can
> -	 * pass to bridges below this one.  This is basically the
> -	 * extra space reduced by the minimal required space for the
> -	 * non-hotplug bridges.
> +	 * Reduce the available resource space by what the
> +	 * bridge and devices below it occupy.

This can be widen:


	/*
	 * Reduce the available resource space by what the bridge and
	 * devices below it occupy.
	 */


>  	 */
> -	remaining_io = available_io;
> -	remaining_mmio = available_mmio;
> -	remaining_mmio_pref = available_mmio_pref;
> -
>  	for_each_pci_bridge(dev, bus) {
> -		const struct resource *res;
> +		struct resource *res;
> +		resource_size_t used_size;

Some people like "reverse christmas tree" format better:

		resource_size_t used_size;
		struct resource *res;

Can it be const, BTW?

>  		if (dev->is_hotplug_bridge)
>  			continue;
>  
> -		/*
> -		 * Reduce the available resource space by what the
> -		 * bridge and devices below it occupy.
> -		 */
>  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
> -		if (!res->parent && available_io > resource_size(res))
> -			remaining_io -= resource_size(res);
> +		align = pci_resource_alignment(dev, res);
> +		align = align ? ALIGN(io.start, align) - io.start : 0;
> +		used_size = align + resource_size(res);
> +		if (!res->parent && used_size <= resource_size(&io))
> +			io.start += used_size;
>  
>  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
> -		if (!res->parent && available_mmio > resource_size(res))
> -			remaining_mmio -= resource_size(res);
> +		align = pci_resource_alignment(dev, res);
> +		align = align ? ALIGN(mmio.start, align) - mmio.start : 0;
> +		used_size = align + resource_size(res);
> +		if (!res->parent && used_size <= resource_size(&mmio))
> +			mmio.start += used_size;
>  
>  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
> -		if (!res->parent && available_mmio_pref > resource_size(res))
> -			remaining_mmio_pref -= resource_size(res);
> +		align = pci_resource_alignment(dev, res);
> +		align = align ? ALIGN(mmio_pref.start, align) -
> +				mmio_pref.start : 0;
> +		used_size = align + resource_size(res);
> +		if (!res->parent && used_size <= resource_size(&mmio_pref))
> +			mmio_pref.start += used_size;
>  	}
>  
> +	if (!hotplug_bridges)
> +		return;
> +
>  	/*
> -	 * Go over devices on this bus and distribute the remaining
> -	 * resource space between hotplug bridges.
> +	 * Distribute any remaining resources equally between
> +	 * the hotplug-capable downstream ports.
>  	 */
> -	for_each_pci_bridge(dev, bus) {
> -		resource_size_t align, io, mmio, mmio_pref;
> -		struct pci_bus *b;
> +	io_per_hp = div64_ul(resource_size(&io), hotplug_bridges);
> +	mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges);
> +	mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref),
> +		hotplug_bridges);
>  
> -		b = dev->subordinate;
> -		if (!b || !dev->is_hotplug_bridge)
> +	for_each_pci_bridge(dev, bus) {
> +		if (!dev->subordinate || !dev->is_hotplug_bridge)
>  			continue;
>  
> -		/*
> -		 * Distribute available extra resources equally between
> -		 * hotplug-capable downstream ports taking alignment into
> -		 * account.
> -		 */
> -		align = pci_resource_alignment(bridge, io_res);
> -		io = div64_ul(available_io, hotplug_bridges);
> -		io = min(ALIGN(io, align), remaining_io);
> -		remaining_io -= io;
> -
> -		align = pci_resource_alignment(bridge, mmio_res);
> -		mmio = div64_ul(available_mmio, hotplug_bridges);
> -		mmio = min(ALIGN(mmio, align), remaining_mmio);
> -		remaining_mmio -= mmio;
> +		io.end = io.start + io_per_hp - 1;
> +		mmio.end = mmio.start + mmio_per_hp - 1;
> +		mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1;
>  
> -		align = pci_resource_alignment(bridge, mmio_pref_res);
> -		mmio_pref = div64_ul(available_mmio_pref, hotplug_bridges);
> -		mmio_pref = min(ALIGN(mmio_pref, align), remaining_mmio_pref);
> -		remaining_mmio_pref -= mmio_pref;
> +		pci_bus_distribute_available_resources(dev->subordinate,
> +			add_list, io, mmio, mmio_pref);
>  
> -		pci_bus_distribute_available_resources(b, add_list, io, mmio,
> -						       mmio_pref);
> +		io.start = io.end + 1;

I think you can also write it like:

		io.start += io_per_hp;

> +		mmio.start = mmio.end + 1;
> +		mmio_pref.start = mmio_pref.end + 1;
>  	}
>  }
>  
>  static void pci_bridge_distribute_available_resources(struct pci_dev *bridge,
>  						     struct list_head *add_list)
>  {
> -	resource_size_t available_io, available_mmio, available_mmio_pref;
> -	const struct resource *res;
> +	struct resource io, mmio, mmio_pref;
>  
>  	if (!bridge->is_hotplug_bridge)
>  		return;
>  
>  	/* Take the initial extra resources from the hotplug port */
> -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 0];
> -	available_io = resource_size(res);
> -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 1];
> -	available_mmio = resource_size(res);
> -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> -	available_mmio_pref = resource_size(res);
> +	io = bridge->resource[PCI_BRIDGE_RESOURCES + 0];
> +	mmio = bridge->resource[PCI_BRIDGE_RESOURCES + 1];
> +	mmio_pref = bridge->resource[PCI_BRIDGE_RESOURCES + 2];
>  
> -	pci_bus_distribute_available_resources(bridge->subordinate,
> -					       add_list, available_io,
> -					       available_mmio,
> -					       available_mmio_pref);
> +	pci_bus_distribute_available_resources(bridge->subordinate, add_list,
> +					       io, mmio, mmio_pref);
>  }
>  
>  void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
> -- 
> 2.22.0
Nicholas Johnson Oct. 23, 2019, 9:08 a.m. UTC | #2
On Tue, Oct 08, 2019 at 02:38:12PM +0300, mika.westerberg@linux.intel.com wrote:
> Hi Nicholas,

Hi Mika,

I apologise for not responding quickly . I have switched off for a while 
- taking my time to post the patches based on Linux 5.4. Hence, I was 
not expecting any emails on this, and was not checking. Plus I was 
starting to lose motivation.

I have been taking the time to change how I approach this. I am going to 
post the patches to egpu.io forums to get a heap of people testing it 
and hopefully saying nice things about it. Originally I thought it would 
be quick to get the patches accepted so I was only going to announce 
this after being accepted.

I also realised my patch series should not be a series. None of this is 
specific to Thunderbolt and hence should not be a series. By separating 
parts of this series, it may be easier to sign off and accept.

> 
> On Fri, Jul 26, 2019 at 12:53:19PM +0000, Nicholas Johnson wrote:
> > Rewrite pci_bus_distribute_available_resources to better handle bridges
> > with different resource alignment requirements. Pass more details
> > arguments recursively to track the resource start and end addresses
> > relative to the initial hotplug bridge. This is especially useful for
> > Thunderbolt with native PCI enumeration, enabling external graphics
> > cards and other devices with bridge alignment higher than 1MB.
> > 
> > Change extend_bridge_window to resize the actual resource, rather than
> > using add_list and dev_res->add_size. If an additional resource entry
> > exists for the given resource, zero out the add_size field to avoid it
> > interfering. Because add_size is considered optional when allocating,
> > using add_size could cause issues in some cases, because successful
> > resource distribution requires sizes to be guaranteed. Such cases
> > include hot-adding nested hotplug bridges in one enumeration, and
> > potentially others which are yet to be encountered.
> > 
> > Solves bug report: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> 
> Here better to use:
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> 
> > Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> 
> This solves the issue I reported so,
> 
> Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
So this is adding "Tested-by" on top of "Reported-by" and not replacing 
one with the other?

> 
> There are a couple of comments below.
> 
> > Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
> > ---
> >  drivers/pci/setup-bus.c | 148 +++++++++++++++++++---------------------
> >  1 file changed, 71 insertions(+), 77 deletions(-)
> > 
> > diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> > index 79b1fa651..6835fd64c 100644
> > --- a/drivers/pci/setup-bus.c
> > +++ b/drivers/pci/setup-bus.c
> > @@ -1840,12 +1840,10 @@ static void extend_bridge_window(struct pci_dev *bridge, struct resource *res,
> >  }
> >  
> >  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > -					    struct list_head *add_list,
> > -					    resource_size_t available_io,
> > -					    resource_size_t available_mmio,
> > -					    resource_size_t available_mmio_pref)
> > +	struct list_head *add_list, struct resource io,
> > +	struct resource mmio, struct resource mmio_pref)
> 
> You pass a copy of each resource because you modify it inplace. I wonder
> if it makes more sense to explicitly take a copy here with comments?

I have no qualms with modifying parameters, and sometimes quite like 
doing it. I could do as you suggest but that means more lines of diff, 
and Bjorn seems to be sending me a strong message that the less lines of 
diff, the better.

I just noticed this: https://lkml.org/lkml/2019/10/4/337

Bjorn says I am touching critical and complicated code that he does not 
understand. This could explain his aversion to more lines of diff.

If Bjorn will trust you to sign this off and take your assurance that it 
is fine, then I can start taking your advice over his. I have been 
favouring his advice because I figured he would have the final say as 
the PCI subsystem maintainer.

> 
> >  {
> > -	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> > +	resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
> >  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
> >  	struct resource *io_res, *mmio_res, *mmio_pref_res;
> >  	struct pci_dev *dev, *bridge = bus->self;
> > @@ -1855,15 +1853,29 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  	mmio_pref_res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> >  
> >  	/*
> > -	 * Update additional resource list (add_list) to fill all the
> > -	 * extra resource space available for this port except the space
> > -	 * calculated in __pci_bus_size_bridges() which covers all the
> > -	 * devices currently connected to the port and below.
> > +	 * The alignment of this bridge is yet to be considered, hence it must
> > +	 * be done now before extending its bridge window.
> >  	 */
> > -	extend_bridge_window(bridge, io_res, add_list, available_io);
> > -	extend_bridge_window(bridge, mmio_res, add_list, available_mmio);
> > +	align = pci_resource_alignment(bridge, io_res);
> > +	if (!io_res->parent && align)
> > +		io.start = ALIGN(io.start, align);
> > +
> > +	align = pci_resource_alignment(bridge, mmio_res);
> > +	if (!mmio_res->parent && align)
> > +		mmio.start = ALIGN(mmio.start, align);
> > +
> > +	align = pci_resource_alignment(bridge, mmio_pref_res);
> > +	if (!mmio_pref_res->parent && align)
> > +		mmio_pref.start = ALIGN(mmio_pref.start, align);
> > +
> > +	/*
> > +	 * Update the resources to fill as much remaining resource space in the
> > +	 * parent bridge as possible, while considering alignment.
> > +	 */
> > +	extend_bridge_window(bridge, io_res, add_list, resource_size(&io));
> > +	extend_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
> >  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> > -			     available_mmio_pref);
> > +		resource_size(&mmio_pref));
> 
> I think this should be aligned like:
> 
>  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> 			     resource_size(&mmio_pref));
Me too, I do not know how that one slipped past me.

> 
> 
> >  
> >  	/*
> >  	 * Calculate how many hotplug bridges and normal bridges there
> > @@ -1884,108 +1896,90 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  	 */
> >  	if (hotplug_bridges + normal_bridges == 1) {
> >  		dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
> > -		if (dev->subordinate) {
> > +		if (dev->subordinate)
> >  			pci_bus_distribute_available_resources(dev->subordinate,
> > -				add_list, available_io, available_mmio,
> > -				available_mmio_pref);
> > -		}
> > +				add_list, io, mmio, mmio_pref);
> >  		return;
> >  	}
> >  
> > -	if (hotplug_bridges == 0)
> > -		return;
> > -
> >  	/*
> > -	 * Calculate the total amount of extra resource space we can
> > -	 * pass to bridges below this one.  This is basically the
> > -	 * extra space reduced by the minimal required space for the
> > -	 * non-hotplug bridges.
> > +	 * Reduce the available resource space by what the
> > +	 * bridge and devices below it occupy.
> 
> This can be widen:
I avoided changing comments because Bjorn said it creates distracting 
noise. But I am considering changing tactics because what I have been 
doing has not been working.

> 
> 
> 	/*
> 	 * Reduce the available resource space by what the bridge and
> 	 * devices below it occupy.
> 	 */
> 
> 
> >  	 */
> > -	remaining_io = available_io;
> > -	remaining_mmio = available_mmio;
> > -	remaining_mmio_pref = available_mmio_pref;
> > -
> >  	for_each_pci_bridge(dev, bus) {
> > -		const struct resource *res;
> > +		struct resource *res;
> > +		resource_size_t used_size;
> 
> Some people like "reverse christmas tree" format better:
We had this discussion a while ago, and Bjorn piped in and said it is 
not enforced. However, I will give it a go this time.

> 
> 		resource_size_t used_size;
> 		struct resource *res;
> 
> Can it be const, BTW?
I will admit that despite loving the C language, const has always 
escaped me. As far as I can tell, it is there for the programmer and 
compiler optimisations, with no functional changes. I will see what 
happens.

> 
> >  		if (dev->is_hotplug_bridge)
> >  			continue;
> >  
> > -		/*
> > -		 * Reduce the available resource space by what the
> > -		 * bridge and devices below it occupy.
> > -		 */
> >  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
> > -		if (!res->parent && available_io > resource_size(res))
> > -			remaining_io -= resource_size(res);
> > +		align = pci_resource_alignment(dev, res);
> > +		align = align ? ALIGN(io.start, align) - io.start : 0;
> > +		used_size = align + resource_size(res);
> > +		if (!res->parent && used_size <= resource_size(&io))
> > +			io.start += used_size;
> >  
> >  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
> > -		if (!res->parent && available_mmio > resource_size(res))
> > -			remaining_mmio -= resource_size(res);
> > +		align = pci_resource_alignment(dev, res);
> > +		align = align ? ALIGN(mmio.start, align) - mmio.start : 0;
> > +		used_size = align + resource_size(res);
> > +		if (!res->parent && used_size <= resource_size(&mmio))
> > +			mmio.start += used_size;
> >  
> >  		res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
> > -		if (!res->parent && available_mmio_pref > resource_size(res))
> > -			remaining_mmio_pref -= resource_size(res);
> > +		align = pci_resource_alignment(dev, res);
> > +		align = align ? ALIGN(mmio_pref.start, align) -
> > +				mmio_pref.start : 0;
> > +		used_size = align + resource_size(res);
> > +		if (!res->parent && used_size <= resource_size(&mmio_pref))
> > +			mmio_pref.start += used_size;
> >  	}
> >  
> > +	if (!hotplug_bridges)
> > +		return;
> > +
> >  	/*
> > -	 * Go over devices on this bus and distribute the remaining
> > -	 * resource space between hotplug bridges.
> > +	 * Distribute any remaining resources equally between
> > +	 * the hotplug-capable downstream ports.
> >  	 */
> > -	for_each_pci_bridge(dev, bus) {
> > -		resource_size_t align, io, mmio, mmio_pref;
> > -		struct pci_bus *b;
> > +	io_per_hp = div64_ul(resource_size(&io), hotplug_bridges);
> > +	mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges);
> > +	mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref),
> > +		hotplug_bridges);
> >  
> > -		b = dev->subordinate;
> > -		if (!b || !dev->is_hotplug_bridge)
> > +	for_each_pci_bridge(dev, bus) {
> > +		if (!dev->subordinate || !dev->is_hotplug_bridge)
> >  			continue;
> >  
> > -		/*
> > -		 * Distribute available extra resources equally between
> > -		 * hotplug-capable downstream ports taking alignment into
> > -		 * account.
> > -		 */
> > -		align = pci_resource_alignment(bridge, io_res);
> > -		io = div64_ul(available_io, hotplug_bridges);
> > -		io = min(ALIGN(io, align), remaining_io);
> > -		remaining_io -= io;
> > -
> > -		align = pci_resource_alignment(bridge, mmio_res);
> > -		mmio = div64_ul(available_mmio, hotplug_bridges);
> > -		mmio = min(ALIGN(mmio, align), remaining_mmio);
> > -		remaining_mmio -= mmio;
> > +		io.end = io.start + io_per_hp - 1;
> > +		mmio.end = mmio.start + mmio_per_hp - 1;
> > +		mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1;
> >  
> > -		align = pci_resource_alignment(bridge, mmio_pref_res);
> > -		mmio_pref = div64_ul(available_mmio_pref, hotplug_bridges);
> > -		mmio_pref = min(ALIGN(mmio_pref, align), remaining_mmio_pref);
> > -		remaining_mmio_pref -= mmio_pref;
> > +		pci_bus_distribute_available_resources(dev->subordinate,
> > +			add_list, io, mmio, mmio_pref);
> >  
> > -		pci_bus_distribute_available_resources(b, add_list, io, mmio,
> > -						       mmio_pref);
> > +		io.start = io.end + 1;
> 
> I think you can also write it like:
> 
> 		io.start += io_per_hp;
You are possibly correct - and it is impressive that you saw that. I 
never did. The way that I have written it fits in with the thought 
patterns I used to create it ("set the start of the next window to be 
just after the end of the last"). I will take this suggestion as you 
wanting it written that way (provided testing goes fine).

> 
> > +		mmio.start = mmio.end + 1;
> > +		mmio_pref.start = mmio_pref.end + 1;
> >  	}
> >  }
> >  
> >  static void pci_bridge_distribute_available_resources(struct pci_dev *bridge,
> >  						     struct list_head *add_list)
> >  {
> > -	resource_size_t available_io, available_mmio, available_mmio_pref;
> > -	const struct resource *res;
> > +	struct resource io, mmio, mmio_pref;
> >  
> >  	if (!bridge->is_hotplug_bridge)
> >  		return;
> >  
> >  	/* Take the initial extra resources from the hotplug port */
> > -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 0];
> > -	available_io = resource_size(res);
> > -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 1];
> > -	available_mmio = resource_size(res);
> > -	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> > -	available_mmio_pref = resource_size(res);
> > +	io = bridge->resource[PCI_BRIDGE_RESOURCES + 0];
> > +	mmio = bridge->resource[PCI_BRIDGE_RESOURCES + 1];
> > +	mmio_pref = bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> >  
> > -	pci_bus_distribute_available_resources(bridge->subordinate,
> > -					       add_list, available_io,
> > -					       available_mmio,
> > -					       available_mmio_pref);
> > +	pci_bus_distribute_available_resources(bridge->subordinate, add_list,
> > +					       io, mmio, mmio_pref);
> >  }
> >  
> >  void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
> > -- 
> > 2.22.0

Thanks for reviewing.

Regards,
Nicholas
Mika Westerberg Oct. 23, 2019, 9:34 a.m. UTC | #3
On Wed, Oct 23, 2019 at 09:08:42AM +0000, Nicholas Johnson wrote:
> On Tue, Oct 08, 2019 at 02:38:12PM +0300, mika.westerberg@linux.intel.com wrote:
> > Hi Nicholas,
> 
> Hi Mika,
> 
> I apologise for not responding quickly . I have switched off for a while 
> - taking my time to post the patches based on Linux 5.4. Hence, I was 
> not expecting any emails on this, and was not checking. Plus I was 
> starting to lose motivation.
> 
> I have been taking the time to change how I approach this. I am going to 
> post the patches to egpu.io forums to get a heap of people testing it 
> and hopefully saying nice things about it. Originally I thought it would 
> be quick to get the patches accepted so I was only going to announce 
> this after being accepted.
> 
> I also realised my patch series should not be a series. None of this is 
> specific to Thunderbolt and hence should not be a series. By separating 
> parts of this series, it may be easier to sign off and accept.
> 
> > 
> > On Fri, Jul 26, 2019 at 12:53:19PM +0000, Nicholas Johnson wrote:
> > > Rewrite pci_bus_distribute_available_resources to better handle bridges
> > > with different resource alignment requirements. Pass more details
> > > arguments recursively to track the resource start and end addresses
> > > relative to the initial hotplug bridge. This is especially useful for
> > > Thunderbolt with native PCI enumeration, enabling external graphics
> > > cards and other devices with bridge alignment higher than 1MB.
> > > 
> > > Change extend_bridge_window to resize the actual resource, rather than
> > > using add_list and dev_res->add_size. If an additional resource entry
> > > exists for the given resource, zero out the add_size field to avoid it
> > > interfering. Because add_size is considered optional when allocating,
> > > using add_size could cause issues in some cases, because successful
> > > resource distribution requires sizes to be guaranteed. Such cases
> > > include hot-adding nested hotplug bridges in one enumeration, and
> > > potentially others which are yet to be encountered.
> > > 
> > > Solves bug report: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> > 
> > Here better to use:
> > 
> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> > 
> > > Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > 
> > This solves the issue I reported so,
> > 
> > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> So this is adding "Tested-by" on top of "Reported-by" and not replacing 
> one with the other?

Yes.

> > 
> > There are a couple of comments below.
> > 
> > > Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
> > > ---
> > >  drivers/pci/setup-bus.c | 148 +++++++++++++++++++---------------------
> > >  1 file changed, 71 insertions(+), 77 deletions(-)
> > > 
> > > diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> > > index 79b1fa651..6835fd64c 100644
> > > --- a/drivers/pci/setup-bus.c
> > > +++ b/drivers/pci/setup-bus.c
> > > @@ -1840,12 +1840,10 @@ static void extend_bridge_window(struct pci_dev *bridge, struct resource *res,
> > >  }
> > >  
> > >  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > > -					    struct list_head *add_list,
> > > -					    resource_size_t available_io,
> > > -					    resource_size_t available_mmio,
> > > -					    resource_size_t available_mmio_pref)
> > > +	struct list_head *add_list, struct resource io,
> > > +	struct resource mmio, struct resource mmio_pref)
> > 
> > You pass a copy of each resource because you modify it inplace. I wonder
> > if it makes more sense to explicitly take a copy here with comments?
> 
> I have no qualms with modifying parameters, and sometimes quite like 
> doing it. I could do as you suggest but that means more lines of diff, 
> and Bjorn seems to be sending me a strong message that the less lines of 
> diff, the better.
> 
> I just noticed this: https://lkml.org/lkml/2019/10/4/337
> 
> Bjorn says I am touching critical and complicated code that he does not 
> understand. This could explain his aversion to more lines of diff.
> 
> If Bjorn will trust you to sign this off and take your assurance that it 
> is fine, then I can start taking your advice over his. I have been 
> favouring his advice because I figured he would have the final say as 
> the PCI subsystem maintainer.

Yes, if Bjorn says something you should listen to him and not me ;-)

I'm just trying to help him to review this because I think this is
important stuff.

This indeed touches the resource allocation code which is rather old and
not too well understood but then again it should not prevent us to
extend and make it better to support more configurations.

> > > -	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> > > +	resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
> > >  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
> > >  	struct resource *io_res, *mmio_res, *mmio_pref_res;
> > >  	struct pci_dev *dev, *bridge = bus->self;
> > > @@ -1855,15 +1853,29 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > >  	mmio_pref_res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> > >  
> > >  	/*
> > > -	 * Update additional resource list (add_list) to fill all the
> > > -	 * extra resource space available for this port except the space
> > > -	 * calculated in __pci_bus_size_bridges() which covers all the
> > > -	 * devices currently connected to the port and below.
> > > +	 * The alignment of this bridge is yet to be considered, hence it must
> > > +	 * be done now before extending its bridge window.
> > >  	 */
> > > -	extend_bridge_window(bridge, io_res, add_list, available_io);
> > > -	extend_bridge_window(bridge, mmio_res, add_list, available_mmio);
> > > +	align = pci_resource_alignment(bridge, io_res);
> > > +	if (!io_res->parent && align)
> > > +		io.start = ALIGN(io.start, align);
> > > +
> > > +	align = pci_resource_alignment(bridge, mmio_res);
> > > +	if (!mmio_res->parent && align)
> > > +		mmio.start = ALIGN(mmio.start, align);
> > > +
> > > +	align = pci_resource_alignment(bridge, mmio_pref_res);
> > > +	if (!mmio_pref_res->parent && align)
> > > +		mmio_pref.start = ALIGN(mmio_pref.start, align);
> > > +
> > > +	/*
> > > +	 * Update the resources to fill as much remaining resource space in the
> > > +	 * parent bridge as possible, while considering alignment.
> > > +	 */
> > > +	extend_bridge_window(bridge, io_res, add_list, resource_size(&io));
> > > +	extend_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
> > >  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> > > -			     available_mmio_pref);
> > > +		resource_size(&mmio_pref));
> > 
> > I think this should be aligned like:
> > 
> >  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> > 			     resource_size(&mmio_pref));
> Me too, I do not know how that one slipped past me.
> 
> > 
> > 
> > >  
> > >  	/*
> > >  	 * Calculate how many hotplug bridges and normal bridges there
> > > @@ -1884,108 +1896,90 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > >  	 */
> > >  	if (hotplug_bridges + normal_bridges == 1) {
> > >  		dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
> > > -		if (dev->subordinate) {
> > > +		if (dev->subordinate)
> > >  			pci_bus_distribute_available_resources(dev->subordinate,
> > > -				add_list, available_io, available_mmio,
> > > -				available_mmio_pref);
> > > -		}
> > > +				add_list, io, mmio, mmio_pref);
> > >  		return;
> > >  	}
> > >  
> > > -	if (hotplug_bridges == 0)
> > > -		return;
> > > -
> > >  	/*
> > > -	 * Calculate the total amount of extra resource space we can
> > > -	 * pass to bridges below this one.  This is basically the
> > > -	 * extra space reduced by the minimal required space for the
> > > -	 * non-hotplug bridges.
> > > +	 * Reduce the available resource space by what the
> > > +	 * bridge and devices below it occupy.
> > 
> > This can be widen:
> I avoided changing comments because Bjorn said it creates distracting 
> noise. But I am considering changing tactics because what I have been 
> doing has not been working.

If Bjorn says so then you can just ignore my comment :)
Nicholas Johnson Oct. 23, 2019, 9:47 a.m. UTC | #4
On Wed, Oct 23, 2019 at 12:34:19PM +0300, mika.westerberg@linux.intel.com wrote:
> On Wed, Oct 23, 2019 at 09:08:42AM +0000, Nicholas Johnson wrote:
> > On Tue, Oct 08, 2019 at 02:38:12PM +0300, mika.westerberg@linux.intel.com wrote:
> > > Hi Nicholas,
> > 
> > Hi Mika,
> > 
> > I apologise for not responding quickly . I have switched off for a while 
> > - taking my time to post the patches based on Linux 5.4. Hence, I was 
> > not expecting any emails on this, and was not checking. Plus I was 
> > starting to lose motivation.
> > 
> > I have been taking the time to change how I approach this. I am going to 
> > post the patches to egpu.io forums to get a heap of people testing it 
> > and hopefully saying nice things about it. Originally I thought it would 
> > be quick to get the patches accepted so I was only going to announce 
> > this after being accepted.
> > 
> > I also realised my patch series should not be a series. None of this is 
> > specific to Thunderbolt and hence should not be a series. By separating 
> > parts of this series, it may be easier to sign off and accept.
> > 
> > > 
> > > On Fri, Jul 26, 2019 at 12:53:19PM +0000, Nicholas Johnson wrote:
> > > > Rewrite pci_bus_distribute_available_resources to better handle bridges
> > > > with different resource alignment requirements. Pass more details
> > > > arguments recursively to track the resource start and end addresses
> > > > relative to the initial hotplug bridge. This is especially useful for
> > > > Thunderbolt with native PCI enumeration, enabling external graphics
> > > > cards and other devices with bridge alignment higher than 1MB.
> > > > 
> > > > Change extend_bridge_window to resize the actual resource, rather than
> > > > using add_list and dev_res->add_size. If an additional resource entry
> > > > exists for the given resource, zero out the add_size field to avoid it
> > > > interfering. Because add_size is considered optional when allocating,
> > > > using add_size could cause issues in some cases, because successful
> > > > resource distribution requires sizes to be guaranteed. Such cases
> > > > include hot-adding nested hotplug bridges in one enumeration, and
> > > > potentially others which are yet to be encountered.
> > > > 
> > > > Solves bug report: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> > > 
> > > Here better to use:
> > > 
> > > Link: https://bugzilla.kernel.org/show_bug.cgi?id=199581
> > > 
> > > > Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > > 
> > > This solves the issue I reported so,
> > > 
> > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > So this is adding "Tested-by" on top of "Reported-by" and not replacing 
> > one with the other?
> 
> Yes.
> 
> > > 
> > > There are a couple of comments below.
> > > 
> > > > Signed-off-by: Nicholas Johnson <nicholas.johnson-opensource@outlook.com.au>
> > > > ---
> > > >  drivers/pci/setup-bus.c | 148 +++++++++++++++++++---------------------
> > > >  1 file changed, 71 insertions(+), 77 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> > > > index 79b1fa651..6835fd64c 100644
> > > > --- a/drivers/pci/setup-bus.c
> > > > +++ b/drivers/pci/setup-bus.c
> > > > @@ -1840,12 +1840,10 @@ static void extend_bridge_window(struct pci_dev *bridge, struct resource *res,
> > > >  }
> > > >  
> > > >  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > > > -					    struct list_head *add_list,
> > > > -					    resource_size_t available_io,
> > > > -					    resource_size_t available_mmio,
> > > > -					    resource_size_t available_mmio_pref)
> > > > +	struct list_head *add_list, struct resource io,
> > > > +	struct resource mmio, struct resource mmio_pref)
> > > 
> > > You pass a copy of each resource because you modify it inplace. I wonder
> > > if it makes more sense to explicitly take a copy here with comments?
> > 
> > I have no qualms with modifying parameters, and sometimes quite like 
> > doing it. I could do as you suggest but that means more lines of diff, 
> > and Bjorn seems to be sending me a strong message that the less lines of 
> > diff, the better.
> > 
> > I just noticed this: https://lkml.org/lkml/2019/10/4/337
> > 
> > Bjorn says I am touching critical and complicated code that he does not 
> > understand. This could explain his aversion to more lines of diff.
> > 
> > If Bjorn will trust you to sign this off and take your assurance that it 
> > is fine, then I can start taking your advice over his. I have been 
> > favouring his advice because I figured he would have the final say as 
> > the PCI subsystem maintainer.
> 
> Yes, if Bjorn says something you should listen to him and not me ;-)
> 
> I'm just trying to help him to review this because I think this is
> important stuff.
> 
> This indeed touches the resource allocation code which is rather old and
> not too well understood but then again it should not prevent us to
> extend and make it better to support more configurations.

There is one file that I understand better than any other in Linux - 
that is drivers/pci/setup-bus.c

I understand it well enough that I was able to do a fairly big rewrite 
for my own purposes some time ago. It was mostly educational. What I 
learned is how broken everything is. A lot of problems stem from the 
fact that we are trying to support BIOS allocation and native at the 
same time. The two need to be mutually exclusive. X86 seems to be the 
biggest block from going to native. The amount of stuff in the X86 arch 
folder is crazy - and then you have riscv/ with almost nothing - which 
is how it should be, in my opinion.

In other words, we are stuck in an unpleasant situation with no way out.

> 
> > > > -	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> > > > +	resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
> > > >  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
> > > >  	struct resource *io_res, *mmio_res, *mmio_pref_res;
> > > >  	struct pci_dev *dev, *bridge = bus->self;
> > > > @@ -1855,15 +1853,29 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > > >  	mmio_pref_res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
> > > >  
> > > >  	/*
> > > > -	 * Update additional resource list (add_list) to fill all the
> > > > -	 * extra resource space available for this port except the space
> > > > -	 * calculated in __pci_bus_size_bridges() which covers all the
> > > > -	 * devices currently connected to the port and below.
> > > > +	 * The alignment of this bridge is yet to be considered, hence it must
> > > > +	 * be done now before extending its bridge window.
> > > >  	 */
> > > > -	extend_bridge_window(bridge, io_res, add_list, available_io);
> > > > -	extend_bridge_window(bridge, mmio_res, add_list, available_mmio);
> > > > +	align = pci_resource_alignment(bridge, io_res);
> > > > +	if (!io_res->parent && align)
> > > > +		io.start = ALIGN(io.start, align);
> > > > +
> > > > +	align = pci_resource_alignment(bridge, mmio_res);
> > > > +	if (!mmio_res->parent && align)
> > > > +		mmio.start = ALIGN(mmio.start, align);
> > > > +
> > > > +	align = pci_resource_alignment(bridge, mmio_pref_res);
> > > > +	if (!mmio_pref_res->parent && align)
> > > > +		mmio_pref.start = ALIGN(mmio_pref.start, align);
> > > > +
> > > > +	/*
> > > > +	 * Update the resources to fill as much remaining resource space in the
> > > > +	 * parent bridge as possible, while considering alignment.
> > > > +	 */
> > > > +	extend_bridge_window(bridge, io_res, add_list, resource_size(&io));
> > > > +	extend_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
> > > >  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> > > > -			     available_mmio_pref);
> > > > +		resource_size(&mmio_pref));
> > > 
> > > I think this should be aligned like:
> > > 
> > >  	extend_bridge_window(bridge, mmio_pref_res, add_list,
> > > 			     resource_size(&mmio_pref));
> > Me too, I do not know how that one slipped past me.
> > 
> > > 
> > > 
> > > >  
> > > >  	/*
> > > >  	 * Calculate how many hotplug bridges and normal bridges there
> > > > @@ -1884,108 +1896,90 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> > > >  	 */
> > > >  	if (hotplug_bridges + normal_bridges == 1) {
> > > >  		dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
> > > > -		if (dev->subordinate) {
> > > > +		if (dev->subordinate)
> > > >  			pci_bus_distribute_available_resources(dev->subordinate,
> > > > -				add_list, available_io, available_mmio,
> > > > -				available_mmio_pref);
> > > > -		}
> > > > +				add_list, io, mmio, mmio_pref);
> > > >  		return;
> > > >  	}
> > > >  
> > > > -	if (hotplug_bridges == 0)
> > > > -		return;
> > > > -
> > > >  	/*
> > > > -	 * Calculate the total amount of extra resource space we can
> > > > -	 * pass to bridges below this one.  This is basically the
> > > > -	 * extra space reduced by the minimal required space for the
> > > > -	 * non-hotplug bridges.
> > > > +	 * Reduce the available resource space by what the
> > > > +	 * bridge and devices below it occupy.
> > > 
> > > This can be widen:
> > I avoided changing comments because Bjorn said it creates distracting 
> > noise. But I am considering changing tactics because what I have been 
> > doing has not been working.
> 
> If Bjorn says so then you can just ignore my comment :)
Bjorn Helgaas Oct. 23, 2019, 2:03 p.m. UTC | #5
On Wed, Oct 23, 2019 at 09:08:42AM +0000, Nicholas Johnson wrote:
> On Tue, Oct 08, 2019 at 02:38:12PM +0300, mika.westerberg@linux.intel.com wrote:
> > On Fri, Jul 26, 2019 at 12:53:19PM +0000, Nicholas Johnson wrote:

> > >  	/*
> > > -	 * Calculate the total amount of extra resource space we can
> > > -	 * pass to bridges below this one.  This is basically the
> > > -	 * extra space reduced by the minimal required space for the
> > > -	 * non-hotplug bridges.
> > > +	 * Reduce the available resource space by what the
> > > +	 * bridge and devices below it occupy.
> > 
> > This can be widen:
> I avoided changing comments because Bjorn said it creates distracting 
> noise. But I am considering changing tactics because what I have been 
> doing has not been working.

I think Mika's point was not that you should avoid changing the
comment, but that your new comment could be rewrapped so it used the
whole 80 column width, which matches the rest of the file.  That's
trivial to do and if you don't do it I can do it while applying the
patch.

> > 	/*
> > 	 * Reduce the available resource space by what the bridge and
> > 	 * devices below it occupy.
> > 	 */
> > 
> > >  	 */
> > > -	remaining_io = available_io;
> > > -	remaining_mmio = available_mmio;
> > > -	remaining_mmio_pref = available_mmio_pref;
> > > -
> > >  	for_each_pci_bridge(dev, bus) {
> > > -		const struct resource *res;
> > > +		struct resource *res;
> > > +		resource_size_t used_size;
> > 
> > Some people like "reverse christmas tree" format better:
> We had this discussion a while ago, and Bjorn piped in and said it is 
> not enforced. However, I will give it a go this time.

I usually don't request changes in the order, so it doesn't really
matter to me, but I personally put the declarations in the order of
their use in the code below.

> > 		resource_size_t used_size;
> > 		struct resource *res;

> > > -		pci_bus_distribute_available_resources(b, add_list, io, mmio,
> > > -						       mmio_pref);
> > > +		io.start = io.end + 1;
> > 
> > I think you can also write it like:
> > 
> > 		io.start += io_per_hp;
> You are possibly correct - and it is impressive that you saw that. I 
> never did. The way that I have written it fits in with the thought 
> patterns I used to create it ("set the start of the next window to be 
> just after the end of the last"). I will take this suggestion as you 
> wanting it written that way (provided testing goes fine).
> 
> > > +		mmio.start = mmio.end + 1;
> > > +		mmio_pref.start = mmio_pref.end + 1;

I assume you'll do that for mmio.start and mmio_pref.start as well?

Bjorn

Patch
diff mbox series

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 79b1fa651..6835fd64c 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1840,12 +1840,10 @@  static void extend_bridge_window(struct pci_dev *bridge, struct resource *res,
 }
 
 static void pci_bus_distribute_available_resources(struct pci_bus *bus,
-					    struct list_head *add_list,
-					    resource_size_t available_io,
-					    resource_size_t available_mmio,
-					    resource_size_t available_mmio_pref)
+	struct list_head *add_list, struct resource io,
+	struct resource mmio, struct resource mmio_pref)
 {
-	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
+	resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
 	unsigned int normal_bridges = 0, hotplug_bridges = 0;
 	struct resource *io_res, *mmio_res, *mmio_pref_res;
 	struct pci_dev *dev, *bridge = bus->self;
@@ -1855,15 +1853,29 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 	mmio_pref_res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
 
 	/*
-	 * Update additional resource list (add_list) to fill all the
-	 * extra resource space available for this port except the space
-	 * calculated in __pci_bus_size_bridges() which covers all the
-	 * devices currently connected to the port and below.
+	 * The alignment of this bridge is yet to be considered, hence it must
+	 * be done now before extending its bridge window.
 	 */
-	extend_bridge_window(bridge, io_res, add_list, available_io);
-	extend_bridge_window(bridge, mmio_res, add_list, available_mmio);
+	align = pci_resource_alignment(bridge, io_res);
+	if (!io_res->parent && align)
+		io.start = ALIGN(io.start, align);
+
+	align = pci_resource_alignment(bridge, mmio_res);
+	if (!mmio_res->parent && align)
+		mmio.start = ALIGN(mmio.start, align);
+
+	align = pci_resource_alignment(bridge, mmio_pref_res);
+	if (!mmio_pref_res->parent && align)
+		mmio_pref.start = ALIGN(mmio_pref.start, align);
+
+	/*
+	 * Update the resources to fill as much remaining resource space in the
+	 * parent bridge as possible, while considering alignment.
+	 */
+	extend_bridge_window(bridge, io_res, add_list, resource_size(&io));
+	extend_bridge_window(bridge, mmio_res, add_list, resource_size(&mmio));
 	extend_bridge_window(bridge, mmio_pref_res, add_list,
-			     available_mmio_pref);
+		resource_size(&mmio_pref));
 
 	/*
 	 * Calculate how many hotplug bridges and normal bridges there
@@ -1884,108 +1896,90 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 	 */
 	if (hotplug_bridges + normal_bridges == 1) {
 		dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
-		if (dev->subordinate) {
+		if (dev->subordinate)
 			pci_bus_distribute_available_resources(dev->subordinate,
-				add_list, available_io, available_mmio,
-				available_mmio_pref);
-		}
+				add_list, io, mmio, mmio_pref);
 		return;
 	}
 
-	if (hotplug_bridges == 0)
-		return;
-
 	/*
-	 * Calculate the total amount of extra resource space we can
-	 * pass to bridges below this one.  This is basically the
-	 * extra space reduced by the minimal required space for the
-	 * non-hotplug bridges.
+	 * Reduce the available resource space by what the
+	 * bridge and devices below it occupy.
 	 */
-	remaining_io = available_io;
-	remaining_mmio = available_mmio;
-	remaining_mmio_pref = available_mmio_pref;
-
 	for_each_pci_bridge(dev, bus) {
-		const struct resource *res;
+		struct resource *res;
+		resource_size_t used_size;
 
 		if (dev->is_hotplug_bridge)
 			continue;
 
-		/*
-		 * Reduce the available resource space by what the
-		 * bridge and devices below it occupy.
-		 */
 		res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
-		if (!res->parent && available_io > resource_size(res))
-			remaining_io -= resource_size(res);
+		align = pci_resource_alignment(dev, res);
+		align = align ? ALIGN(io.start, align) - io.start : 0;
+		used_size = align + resource_size(res);
+		if (!res->parent && used_size <= resource_size(&io))
+			io.start += used_size;
 
 		res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
-		if (!res->parent && available_mmio > resource_size(res))
-			remaining_mmio -= resource_size(res);
+		align = pci_resource_alignment(dev, res);
+		align = align ? ALIGN(mmio.start, align) - mmio.start : 0;
+		used_size = align + resource_size(res);
+		if (!res->parent && used_size <= resource_size(&mmio))
+			mmio.start += used_size;
 
 		res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
-		if (!res->parent && available_mmio_pref > resource_size(res))
-			remaining_mmio_pref -= resource_size(res);
+		align = pci_resource_alignment(dev, res);
+		align = align ? ALIGN(mmio_pref.start, align) -
+				mmio_pref.start : 0;
+		used_size = align + resource_size(res);
+		if (!res->parent && used_size <= resource_size(&mmio_pref))
+			mmio_pref.start += used_size;
 	}
 
+	if (!hotplug_bridges)
+		return;
+
 	/*
-	 * Go over devices on this bus and distribute the remaining
-	 * resource space between hotplug bridges.
+	 * Distribute any remaining resources equally between
+	 * the hotplug-capable downstream ports.
 	 */
-	for_each_pci_bridge(dev, bus) {
-		resource_size_t align, io, mmio, mmio_pref;
-		struct pci_bus *b;
+	io_per_hp = div64_ul(resource_size(&io), hotplug_bridges);
+	mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges);
+	mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref),
+		hotplug_bridges);
 
-		b = dev->subordinate;
-		if (!b || !dev->is_hotplug_bridge)
+	for_each_pci_bridge(dev, bus) {
+		if (!dev->subordinate || !dev->is_hotplug_bridge)
 			continue;
 
-		/*
-		 * Distribute available extra resources equally between
-		 * hotplug-capable downstream ports taking alignment into
-		 * account.
-		 */
-		align = pci_resource_alignment(bridge, io_res);
-		io = div64_ul(available_io, hotplug_bridges);
-		io = min(ALIGN(io, align), remaining_io);
-		remaining_io -= io;
-
-		align = pci_resource_alignment(bridge, mmio_res);
-		mmio = div64_ul(available_mmio, hotplug_bridges);
-		mmio = min(ALIGN(mmio, align), remaining_mmio);
-		remaining_mmio -= mmio;
+		io.end = io.start + io_per_hp - 1;
+		mmio.end = mmio.start + mmio_per_hp - 1;
+		mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1;
 
-		align = pci_resource_alignment(bridge, mmio_pref_res);
-		mmio_pref = div64_ul(available_mmio_pref, hotplug_bridges);
-		mmio_pref = min(ALIGN(mmio_pref, align), remaining_mmio_pref);
-		remaining_mmio_pref -= mmio_pref;
+		pci_bus_distribute_available_resources(dev->subordinate,
+			add_list, io, mmio, mmio_pref);
 
-		pci_bus_distribute_available_resources(b, add_list, io, mmio,
-						       mmio_pref);
+		io.start = io.end + 1;
+		mmio.start = mmio.end + 1;
+		mmio_pref.start = mmio_pref.end + 1;
 	}
 }
 
 static void pci_bridge_distribute_available_resources(struct pci_dev *bridge,
 						     struct list_head *add_list)
 {
-	resource_size_t available_io, available_mmio, available_mmio_pref;
-	const struct resource *res;
+	struct resource io, mmio, mmio_pref;
 
 	if (!bridge->is_hotplug_bridge)
 		return;
 
 	/* Take the initial extra resources from the hotplug port */
-	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 0];
-	available_io = resource_size(res);
-	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 1];
-	available_mmio = resource_size(res);
-	res = &bridge->resource[PCI_BRIDGE_RESOURCES + 2];
-	available_mmio_pref = resource_size(res);
+	io = bridge->resource[PCI_BRIDGE_RESOURCES + 0];
+	mmio = bridge->resource[PCI_BRIDGE_RESOURCES + 1];
+	mmio_pref = bridge->resource[PCI_BRIDGE_RESOURCES + 2];
 
-	pci_bus_distribute_available_resources(bridge->subordinate,
-					       add_list, available_io,
-					       available_mmio,
-					       available_mmio_pref);
+	pci_bus_distribute_available_resources(bridge->subordinate, add_list,
+					       io, mmio, mmio_pref);
 }
 
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)