diff mbox

[v7,11/50] powerpc/powernv: IO and M32 mapping based on PCI device resources

Message ID 1446642770-4681-12-git-send-email-gwshan@linux.vnet.ibm.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Gavin Shan Nov. 4, 2015, 1:12 p.m. UTC
Currently, the IO and M32 segments are mapped to the corresponding
PE based on the windows of the parent bridge of PE's primary bus.
It's not going to work when the windows of root port or upstream
port of the PCIe switch behind root port are extended to PHB's
aperatuses in order to support hotplug in subsequent patch.

This fixes the issue by mapping IO and M32 segments based on the
resources of the PCI devices included in the PE, instead of the
windows of the parent bridge of the PE's primary bus.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 131 +++++++++++++++++-------------
 1 file changed, 75 insertions(+), 56 deletions(-)

Comments

Daniel Axtens Nov. 12, 2015, 3:30 a.m. UTC | #1
Hi Gavin,

Sorry to have taken so long to resume these reviews!

> Currently, the IO and M32 segments are mapped to the corresponding
> PE based on the windows of the parent bridge of PE's primary bus.
> It's not going to work when the windows of root port or upstream
> port of the PCIe switch behind root port are extended to PHB's
> aperatuses in order to support hotplug in subsequent patch.
I'm not _entirely_ sure I understand this.

I *think* you mean PHB's apertures (i.e. s/aperatuses/apertures/)?

> This fixes the issue by mapping IO and M32 segments based on the
> resources of the PCI devices included in the PE, instead of the
> windows of the parent bridge of the PE's primary bus.

This solution seems to make a lot of sense, but I don't have a very good
understanding of PCI yet: why was it done that way and not this way
originally? Looking at the code, it looks like the old way was simple
but didn't support SR-IOV?

There are a few comments inline as well.

> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 553d3f3..4ab93f8 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -2741,71 +2741,90 @@ truncate_iov:
>  }
>  #endif /* CONFIG_PCI_IOV */
>  
> -/*
> - * This function is supposed to be called on basis of PE from top
> - * to bottom style. So the the I/O or MMIO segment assigned to
> - * parent PE could be overrided by its child PEs if necessary.
> - */
> -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
> -				  struct pnv_ioda_pe *pe)
> +static int pnv_ioda_setup_one_res(struct pnv_ioda_pe *pe,
> +				  struct resource *res)
>  {
> -	struct pnv_phb *phb = hose->private_data;
> +	struct pnv_phb *phb = pe->phb;
>  	struct pci_bus_region region;
> -	struct resource *res;
> -	unsigned int segsize;
> -	int *segmap, index, i;
> +	unsigned int index, segsize;
> +	int *segmap;
>  	uint16_t win;
>  	int64_t rc;

s/int64_t/s64/;
I think we might also want to change the uint16_t as well.

> -	/*
> -	 * NOTE: We only care PCI bus based PE for now. For PCI
> -	 * device based PE, for example SRIOV sensitive VF should
> -	 * be figured out later.
> -	 */
> -	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
> +	if (!res->parent || !res->flags || res->start > res->end)
> +		return 0;
>  
> -	pci_bus_for_each_resource(pe->pbus, res, i) {
> -		if (!res || !res->flags ||
> -		    res->start > res->end)
> -			continue;
> +	if (res->flags & IORESOURCE_IO) {
> +		region.start = res->start - phb->ioda.io_pci_base;
> +		region.end   = res->end - phb->ioda.io_pci_base;
> +		segsize      = phb->ioda.io_segsize;
> +		segmap       = phb->ioda.io_segmap;
> +		win          = OPAL_IO_WINDOW_TYPE;
> +	} else if ((res->flags & IORESOURCE_MEM) &&
> +		   !pnv_pci_is_mem_pref_64(res->flags)) {
> +		region.start = res->start -
> +			       phb->hose->mem_offset[0] -
> +			       phb->ioda.m32_pci_base;
> +		region.end   = res->end -
> +			       phb->hose->mem_offset[0] -
> +			       phb->ioda.m32_pci_base;
> +		segsize      = phb->ioda.m32_segsize;
> +		segmap       = phb->ioda.m32_segmap;
> +		win          = OPAL_M32_WINDOW_TYPE;
> +	} else {
> +		return 0;
The return codes are currently unused, but should this get a more
informative return code? Are there any invalid ones that should be
flagged, or is it just safe to ignore stuff we don't recognise?

> +	}


> +static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
> +{
> +	struct pci_dev *pdev;
> +	struct resource *res;
> +	int i;
> +
> +	/* This function only works for bus dependent PE */
> +	WARN_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
> +
> +	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
> +		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
> +			res = &pdev->resource[i];
> +			if (pnv_ioda_setup_one_res(pe, res))
> +				return;
As I mentioned earlier, setup_one_res can potentially return -EIO:
should we be trying to propagate that up?
> +		}
> +
> +		/*
> +		 * If the PE contains all subordinate PCI buses, the
> +		 * windows of the child bridges should be mapped to
> +		 * the PE as well.
> +		 */
> +		if (!(pe->flags & PNV_IODA_PE_BUS_ALL && pci_is_bridge(pdev)))
> +			continue;
>  
> -			region.start += segsize;
> -			index++;
> +		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
> +			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
> +			if (pnv_ioda_setup_one_res(pe, res))
> +				return;
>  		}
>  	}
>  }

Regards,
Daniel
Gavin Shan Nov. 12, 2015, 4:55 a.m. UTC | #2
On Thu, Nov 12, 2015 at 02:30:27PM +1100, Daniel Axtens wrote:
>Hi Gavin,
>
>Sorry to have taken so long to resume these reviews!
>

Thanks for your review, Daniel!

>> Currently, the IO and M32 segments are mapped to the corresponding
>> PE based on the windows of the parent bridge of PE's primary bus.
>> It's not going to work when the windows of root port or upstream
>> port of the PCIe switch behind root port are extended to PHB's
>> aperatuses in order to support hotplug in subsequent patch.
>I'm not _entirely_ sure I understand this.
>
>I *think* you mean PHB's apertures (i.e. s/aperatuses/apertures/)?
>

I'll fix the typo in next revision.

>> This fixes the issue by mapping IO and M32 segments based on the
>> resources of the PCI devices included in the PE, instead of the
>> windows of the parent bridge of the PE's primary bus.
>
>This solution seems to make a lot of sense, but I don't have a very good
>understanding of PCI yet: why was it done that way and not this way
>originally? Looking at the code, it looks like the old way was simple
>but didn't support SR-IOV?
>

It's not related to SRIOV. Originally, the IO or M32 segments are mapped
according to the bridge's windows. The bridge windows on root port or the
upstream port of the switch behind that will be extended to PHB's apertures.
If we still use bridge's windows, all IO and M32 resources are mapped/assigned
to the PE corresponding to PCI bus#1 or PCI bus#2. That's not correct any more.
So the correct way is to do the mapping based on IO or M32 BARs of the devices
included in the PE.

>There are a few comments inline as well.
>
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 553d3f3..4ab93f8 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -2741,71 +2741,90 @@ truncate_iov:
>>  }
>>  #endif /* CONFIG_PCI_IOV */
>>  
>> -/*
>> - * This function is supposed to be called on basis of PE from top
>> - * to bottom style. So the the I/O or MMIO segment assigned to
>> - * parent PE could be overrided by its child PEs if necessary.
>> - */
>> -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>> -				  struct pnv_ioda_pe *pe)
>> +static int pnv_ioda_setup_one_res(struct pnv_ioda_pe *pe,
>> +				  struct resource *res)
>>  {
>> -	struct pnv_phb *phb = hose->private_data;
>> +	struct pnv_phb *phb = pe->phb;
>>  	struct pci_bus_region region;
>> -	struct resource *res;
>> -	unsigned int segsize;
>> -	int *segmap, index, i;
>> +	unsigned int index, segsize;
>> +	int *segmap;
>>  	uint16_t win;
>>  	int64_t rc;
>
>s/int64_t/s64/;
>I think we might also want to change the uint16_t as well.
>

As I explained before, I changed it from s64 to int64_t and I won't change it
back since both of them are fine. Same situation to uint16 here. If we really
want to clean it all at once, I can do that later, but not in this patchset.

>> -	/*
>> -	 * NOTE: We only care PCI bus based PE for now. For PCI
>> -	 * device based PE, for example SRIOV sensitive VF should
>> -	 * be figured out later.
>> -	 */
>> -	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>> +	if (!res->parent || !res->flags || res->start > res->end)
>> +		return 0;
>>  
>> -	pci_bus_for_each_resource(pe->pbus, res, i) {
>> -		if (!res || !res->flags ||
>> -		    res->start > res->end)
>> -			continue;
>> +	if (res->flags & IORESOURCE_IO) {
>> +		region.start = res->start - phb->ioda.io_pci_base;
>> +		region.end   = res->end - phb->ioda.io_pci_base;
>> +		segsize      = phb->ioda.io_segsize;
>> +		segmap       = phb->ioda.io_segmap;
>> +		win          = OPAL_IO_WINDOW_TYPE;
>> +	} else if ((res->flags & IORESOURCE_MEM) &&
>> +		   !pnv_pci_is_mem_pref_64(res->flags)) {
>> +		region.start = res->start -
>> +			       phb->hose->mem_offset[0] -
>> +			       phb->ioda.m32_pci_base;
>> +		region.end   = res->end -
>> +			       phb->hose->mem_offset[0] -
>> +			       phb->ioda.m32_pci_base;
>> +		segsize      = phb->ioda.m32_segsize;
>> +		segmap       = phb->ioda.m32_segmap;
>> +		win          = OPAL_M32_WINDOW_TYPE;
>> +	} else {
>> +		return 0;
>The return codes are currently unused, but should this get a more
>informative return code? Are there any invalid ones that should be
>flagged, or is it just safe to ignore stuff we don't recognise?
>

It's safe to ignore M64 (64-bits prefetchable BARs) whose mapping is
done in different path.

>> +	}
>
>
>> +static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
>> +{
>> +	struct pci_dev *pdev;
>> +	struct resource *res;
>> +	int i;
>> +
>> +	/* This function only works for bus dependent PE */
>> +	WARN_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>> +
>> +	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
>> +		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
>> +			res = &pdev->resource[i];
>> +			if (pnv_ioda_setup_one_res(pe, res))
>> +				return;
>As I mentioned earlier, setup_one_res can potentially return -EIO:
>should we be trying to propagate that up?

I think it's a good idea. I'll do in next revision.

>> +		}
>> +
>> +		/*
>> +		 * If the PE contains all subordinate PCI buses, the
>> +		 * windows of the child bridges should be mapped to
>> +		 * the PE as well.
>> +		 */
>> +		if (!(pe->flags & PNV_IODA_PE_BUS_ALL && pci_is_bridge(pdev)))
>> +			continue;
>>  
>> -			region.start += segsize;
>> -			index++;
>> +		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
>> +			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>> +			if (pnv_ioda_setup_one_res(pe, res))
>> +				return;
>>  		}
>>  	}
>>  }
>

Thanks,
Gavin
Alexey Kardashevskiy Nov. 16, 2015, 8:01 a.m. UTC | #3
On 11/12/2015 03:55 PM, Gavin Shan wrote:
> On Thu, Nov 12, 2015 at 02:30:27PM +1100, Daniel Axtens wrote:
>> Hi Gavin,
>>
>> Sorry to have taken so long to resume these reviews!
>>
>
> Thanks for your review, Daniel!
>
>>> Currently, the IO and M32 segments are mapped to the corresponding
>>> PE based on the windows of the parent bridge of PE's primary bus.
>>> It's not going to work when the windows of root port or upstream
>>> port of the PCIe switch behind root port are extended to PHB's
>>> aperatuses in order to support hotplug in subsequent patch.
>> I'm not _entirely_ sure I understand this.
>>
>> I *think* you mean PHB's apertures (i.e. s/aperatuses/apertures/)?
>>
>
> I'll fix the typo in next revision.
>
>>> This fixes the issue by mapping IO and M32 segments based on the
>>> resources of the PCI devices included in the PE, instead of the
>>> windows of the parent bridge of the PE's primary bus.
>>
>> This solution seems to make a lot of sense, but I don't have a very good
>> understanding of PCI yet: why was it done that way and not this way
>> originally? Looking at the code, it looks like the old way was simple
>> but didn't support SR-IOV?
>>
>
> It's not related to SRIOV. Originally, the IO or M32 segments are mapped
> according to the bridge's windows.


Sorry, I do not understand what this means...


> The bridge windows on root port or the
> upstream port of the switch behind that will be extended to PHB's apertures.

What does "extended" mean here and why would the windows be extended anyway?


> If we still use bridge's windows, all IO and M32 resources are mapped/assigned
> to the PE corresponding to PCI bus#1 or PCI bus#2. That's not correct any more.
> So the correct way is to do the mapping based on IO or M32 BARs of the devices
> included in the PE.

In this patch I see quite a lot of code movements and I fail to spot the 
actual change here...


It used to be a single loop:

pci_bus_for_each_resource(pe->pbus, res, i) {
	/* do stuff for each @res */
}

and now it is 2 loops inside another loop:

list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
		res = &pdev->resource[i];
		/* do stuff for each @res */
	}

	for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
	        res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
		/* do stuff for each @res */
	}
}


Is that correct? If yes, why is not the patch as simple as this? If no, 
what did I miss?



>
>> There are a few comments inline as well.
>>
>>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>> index 553d3f3..4ab93f8 100644
>>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>> @@ -2741,71 +2741,90 @@ truncate_iov:
>>>   }
>>>   #endif /* CONFIG_PCI_IOV */
>>>
>>> -/*
>>> - * This function is supposed to be called on basis of PE from top
>>> - * to bottom style. So the the I/O or MMIO segment assigned to
>>> - * parent PE could be overrided by its child PEs if necessary.
>>> - */
>>> -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>> -				  struct pnv_ioda_pe *pe)
>>> +static int pnv_ioda_setup_one_res(struct pnv_ioda_pe *pe,
>>> +				  struct resource *res)
>>>   {
>>> -	struct pnv_phb *phb = hose->private_data;
>>> +	struct pnv_phb *phb = pe->phb;
>>>   	struct pci_bus_region region;
>>> -	struct resource *res;
>>> -	unsigned int segsize;
>>> -	int *segmap, index, i;
>>> +	unsigned int index, segsize;
>>> +	int *segmap;
>>>   	uint16_t win;
>>>   	int64_t rc;
>>
>> s/int64_t/s64/;
>> I think we might also want to change the uint16_t as well.
>>
>
> As I explained before, I changed it from s64 to int64_t and I won't change it
> back since both of them are fine. Same situation to uint16 here. If we really
> want to clean it all at once, I can do that later, but not in this patchset.
>
>>> -	/*
>>> -	 * NOTE: We only care PCI bus based PE for now. For PCI
>>> -	 * device based PE, for example SRIOV sensitive VF should
>>> -	 * be figured out later.
>>> -	 */
>>> -	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>> +	if (!res->parent || !res->flags || res->start > res->end)
>>> +		return 0;
>>>
>>> -	pci_bus_for_each_resource(pe->pbus, res, i) {
>>> -		if (!res || !res->flags ||
>>> -		    res->start > res->end)
>>> -			continue;
>>> +	if (res->flags & IORESOURCE_IO) {
>>> +		region.start = res->start - phb->ioda.io_pci_base;
>>> +		region.end   = res->end - phb->ioda.io_pci_base;
>>> +		segsize      = phb->ioda.io_segsize;
>>> +		segmap       = phb->ioda.io_segmap;
>>> +		win          = OPAL_IO_WINDOW_TYPE;
>>> +	} else if ((res->flags & IORESOURCE_MEM) &&
>>> +		   !pnv_pci_is_mem_pref_64(res->flags)) {
>>> +		region.start = res->start -
>>> +			       phb->hose->mem_offset[0] -
>>> +			       phb->ioda.m32_pci_base;
>>> +		region.end   = res->end -
>>> +			       phb->hose->mem_offset[0] -
>>> +			       phb->ioda.m32_pci_base;
>>> +		segsize      = phb->ioda.m32_segsize;
>>> +		segmap       = phb->ioda.m32_segmap;
>>> +		win          = OPAL_M32_WINDOW_TYPE;



This code asks for a helper function

pnv_ioda_do_setup_one_res(start, end, segsize, segmap, win)

and then you won't need many local variables (region, segsize, segmap, win) ;)
Gavin Shan Nov. 17, 2015, 1:33 a.m. UTC | #4
On Mon, Nov 16, 2015 at 07:01:43PM +1100, Alexey Kardashevskiy wrote:
>On 11/12/2015 03:55 PM, Gavin Shan wrote:
>>On Thu, Nov 12, 2015 at 02:30:27PM +1100, Daniel Axtens wrote:
>>>Hi Gavin,
>>>
>>>Sorry to have taken so long to resume these reviews!
>>>
>>
>>Thanks for your review, Daniel!
>>
>>>>Currently, the IO and M32 segments are mapped to the corresponding
>>>>PE based on the windows of the parent bridge of PE's primary bus.
>>>>It's not going to work when the windows of root port or upstream
>>>>port of the PCIe switch behind root port are extended to PHB's
>>>>aperatuses in order to support hotplug in subsequent patch.
>>>I'm not _entirely_ sure I understand this.
>>>
>>>I *think* you mean PHB's apertures (i.e. s/aperatuses/apertures/)?
>>>
>>
>>I'll fix the typo in next revision.
>>
>>>>This fixes the issue by mapping IO and M32 segments based on the
>>>>resources of the PCI devices included in the PE, instead of the
>>>>windows of the parent bridge of the PE's primary bus.
>>>
>>>This solution seems to make a lot of sense, but I don't have a very good
>>>understanding of PCI yet: why was it done that way and not this way
>>>originally? Looking at the code, it looks like the old way was simple
>>>but didn't support SR-IOV?
>>>
>>
>>It's not related to SRIOV. Originally, the IO or M32 segments are mapped
>>according to the bridge's windows.
>
>
>Sorry, I do not understand what this means...
>

Before this patchset, the IO or M32 segments consumed by one PE are mapped
according to the windows of PCI bridge of the PE's primary bus if the PE
isn't including all subordinate PCI buses orginated from the bridge. Otherwise,
the bridge's windows should be taken into account as well.

>
>>The bridge windows on root port or the
>>upstream port of the switch behind that will be extended to PHB's apertures.
>
>What does "extended" mean here and why would the windows be extended anyway?
>

It's reserving IO or memory resource for possible plugged adapters in the future.

>>If we still use bridge's windows, all IO and M32 resources are mapped/assigned
>>to the PE corresponding to PCI bus#1 or PCI bus#2. That's not correct any more.
>>So the correct way is to do the mapping based on IO or M32 BARs of the devices
>>included in the PE.
>
>In this patch I see quite a lot of code movements and I fail to spot the
>actual change here...
>
>
>It used to be a single loop:
>
>pci_bus_for_each_resource(pe->pbus, res, i) {
>	/* do stuff for each @res */
>}
>
>and now it is 2 loops inside another loop:
>
>list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
>	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
>		res = &pdev->resource[i];
>		/* do stuff for each @res */
>	}
>
>	for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
>	        res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>		/* do stuff for each @res */
>	}
>}
>
>
>Is that correct? If yes, why is not the patch as simple as this? If no, what
>did I miss?
>

That's correct. The resource tracked by the PCI bridge windows can belonged
to another PE instead the one tracked by @pe in the code.

>
>
>>
>>>There are a few comments inline as well.
>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>index 553d3f3..4ab93f8 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>@@ -2741,71 +2741,90 @@ truncate_iov:
>>>>  }
>>>>  #endif /* CONFIG_PCI_IOV */
>>>>
>>>>-/*
>>>>- * This function is supposed to be called on basis of PE from top
>>>>- * to bottom style. So the the I/O or MMIO segment assigned to
>>>>- * parent PE could be overrided by its child PEs if necessary.
>>>>- */
>>>>-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>>>-				  struct pnv_ioda_pe *pe)
>>>>+static int pnv_ioda_setup_one_res(struct pnv_ioda_pe *pe,
>>>>+				  struct resource *res)
>>>>  {
>>>>-	struct pnv_phb *phb = hose->private_data;
>>>>+	struct pnv_phb *phb = pe->phb;
>>>>  	struct pci_bus_region region;
>>>>-	struct resource *res;
>>>>-	unsigned int segsize;
>>>>-	int *segmap, index, i;
>>>>+	unsigned int index, segsize;
>>>>+	int *segmap;
>>>>  	uint16_t win;
>>>>  	int64_t rc;
>>>
>>>s/int64_t/s64/;
>>>I think we might also want to change the uint16_t as well.
>>>
>>
>>As I explained before, I changed it from s64 to int64_t and I won't change it
>>back since both of them are fine. Same situation to uint16 here. If we really
>>want to clean it all at once, I can do that later, but not in this patchset.
>>
>>>>-	/*
>>>>-	 * NOTE: We only care PCI bus based PE for now. For PCI
>>>>-	 * device based PE, for example SRIOV sensitive VF should
>>>>-	 * be figured out later.
>>>>-	 */
>>>>-	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>>>+	if (!res->parent || !res->flags || res->start > res->end)
>>>>+		return 0;
>>>>
>>>>-	pci_bus_for_each_resource(pe->pbus, res, i) {
>>>>-		if (!res || !res->flags ||
>>>>-		    res->start > res->end)
>>>>-			continue;
>>>>+	if (res->flags & IORESOURCE_IO) {
>>>>+		region.start = res->start - phb->ioda.io_pci_base;
>>>>+		region.end   = res->end - phb->ioda.io_pci_base;
>>>>+		segsize      = phb->ioda.io_segsize;
>>>>+		segmap       = phb->ioda.io_segmap;
>>>>+		win          = OPAL_IO_WINDOW_TYPE;
>>>>+	} else if ((res->flags & IORESOURCE_MEM) &&
>>>>+		   !pnv_pci_is_mem_pref_64(res->flags)) {
>>>>+		region.start = res->start -
>>>>+			       phb->hose->mem_offset[0] -
>>>>+			       phb->ioda.m32_pci_base;
>>>>+		region.end   = res->end -
>>>>+			       phb->hose->mem_offset[0] -
>>>>+			       phb->ioda.m32_pci_base;
>>>>+		segsize      = phb->ioda.m32_segsize;
>>>>+		segmap       = phb->ioda.m32_segmap;
>>>>+		win          = OPAL_M32_WINDOW_TYPE;
>
>
>
>This code asks for a helper function
>
>pnv_ioda_do_setup_one_res(start, end, segsize, segmap, win)
>
>and then you won't need many local variables (region, segsize, segmap, win) ;)
>

Sounds like a good idea. I'll change accordingly in next revision.

Thanks,
Gavin
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 553d3f3..4ab93f8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2741,71 +2741,90 @@  truncate_iov:
 }
 #endif /* CONFIG_PCI_IOV */
 
-/*
- * This function is supposed to be called on basis of PE from top
- * to bottom style. So the the I/O or MMIO segment assigned to
- * parent PE could be overrided by its child PEs if necessary.
- */
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
-				  struct pnv_ioda_pe *pe)
+static int pnv_ioda_setup_one_res(struct pnv_ioda_pe *pe,
+				  struct resource *res)
 {
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pe->phb;
 	struct pci_bus_region region;
-	struct resource *res;
-	unsigned int segsize;
-	int *segmap, index, i;
+	unsigned int index, segsize;
+	int *segmap;
 	uint16_t win;
 	int64_t rc;
 
-	/*
-	 * NOTE: We only care PCI bus based PE for now. For PCI
-	 * device based PE, for example SRIOV sensitive VF should
-	 * be figured out later.
-	 */
-	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+	if (!res->parent || !res->flags || res->start > res->end)
+		return 0;
 
-	pci_bus_for_each_resource(pe->pbus, res, i) {
-		if (!res || !res->flags ||
-		    res->start > res->end)
-			continue;
+	if (res->flags & IORESOURCE_IO) {
+		region.start = res->start - phb->ioda.io_pci_base;
+		region.end   = res->end - phb->ioda.io_pci_base;
+		segsize      = phb->ioda.io_segsize;
+		segmap       = phb->ioda.io_segmap;
+		win          = OPAL_IO_WINDOW_TYPE;
+	} else if ((res->flags & IORESOURCE_MEM) &&
+		   !pnv_pci_is_mem_pref_64(res->flags)) {
+		region.start = res->start -
+			       phb->hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		region.end   = res->end -
+			       phb->hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		segsize      = phb->ioda.m32_segsize;
+		segmap       = phb->ioda.m32_segmap;
+		win          = OPAL_M32_WINDOW_TYPE;
+	} else {
+		return 0;
+	}
 
-		if (res->flags & IORESOURCE_IO) {
-			region.start = res->start - phb->ioda.io_pci_base;
-			region.end   = res->end - phb->ioda.io_pci_base;
-			segsize      = phb->ioda.io_segsize;
-			segmap       = phb->ioda.io_segmap;
-			win          = OPAL_IO_WINDOW_TYPE;
-		} else if ((res->flags & IORESOURCE_MEM) &&
-			   !pnv_pci_is_mem_pref_64(res->flags)) {
-			region.start = res->start -
-				       hose->mem_offset[0] -
-				       phb->ioda.m32_pci_base;
-			region.end   = res->end -
-				       hose->mem_offset[0] -
-				       phb->ioda.m32_pci_base;
-			segsize      = phb->ioda.m32_segsize;
-			segmap       = phb->ioda.m32_segmap;
-			win          = OPAL_M32_WINDOW_TYPE;
-		} else {
-			continue;
+	region.start = _ALIGN_DOWN(region.start, segsize);
+	region.end   = _ALIGN_UP(region.end, segsize);
+	index = region.start / segsize;
+	while (index < phb->ioda.total_pe_num && region.start < region.end) {
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				pe->pe_number, win, 0, index);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
+				__func__, rc, win, index,
+				phb->hose->global_number,
+				pe->pe_number);
+			return -EIO;
 		}
 
-		index = region.start / segsize;
-		while (index < phb->ioda.total_pe_num &&
-		       region.start <= region.end) {
-			segmap[index] = pe->pe_number;
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					pe->pe_number, win, 0, index);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
-					__func__, rc, win, index,
-					pe->phb->hose->global_number,
-					pe->pe_number);
-				break;
-			}
+		segmap[index] = pe->pe_number;
+		region.start += segsize;
+		index++;
+	}
+
+	return 0;
+}
+
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *pdev;
+	struct resource *res;
+	int i;
+
+	/* This function only works for bus dependent PE */
+	WARN_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+			res = &pdev->resource[i];
+			if (pnv_ioda_setup_one_res(pe, res))
+				return;
+		}
+
+		/*
+		 * If the PE contains all subordinate PCI buses, the
+		 * windows of the child bridges should be mapped to
+		 * the PE as well.
+		 */
+		if (!(pe->flags & PNV_IODA_PE_BUS_ALL && pci_is_bridge(pdev)))
+			continue;
 
-			region.start += segsize;
-			index++;
+		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
+			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
+			if (pnv_ioda_setup_one_res(pe, res))
+				return;
 		}
 	}
 }
@@ -2819,7 +2838,7 @@  static void pnv_pci_ioda_setup_seg(void)
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 		phb = hose->private_data;
 		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
-			pnv_ioda_setup_pe_seg(hose, pe);
+			pnv_ioda_setup_pe_seg(pe);
 		}
 	}
 }