diff mbox

[v6,07/42] powerpc/powernv: Improve IO and M32 mapping

Message ID 1438834307-26960-8-git-send-email-gwshan@linux.vnet.ibm.com
State Not Applicable
Headers show

Commit Message

Gavin Shan Aug. 6, 2015, 4:11 a.m. UTC
There're 3 windows (IO, M32 and M64) for PHB, root port and upstream
port of the PCIE switch behind root port. In order to support PCI
hotplug, we extend the start/end address of those 3 windows of root
port or upstream port to the start/end address of the 3 PHB's windows.
The current implementation, assigning IO or M32 segment based on the
bridge's windows, isn't reliable.

The patch fixes above issue by calculating PE's consumed IO or M32
segments from its contained devices, no PCI bridge windows involved
if the PE doesn't contain all the subordinate PCI buses. Otherwise,
the PCI bridge windows still contribute to PE's consumed IO or M32
segments.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 136 +++++++++++++++++-------------
 1 file changed, 79 insertions(+), 57 deletions(-)

Comments

Alexey Kardashevskiy Aug. 10, 2015, 7:40 a.m. UTC | #1
On 08/06/2015 02:11 PM, Gavin Shan wrote:
> There're 3 windows (IO, M32 and M64) for PHB, root port and upstream

These are actually IO, non-prefetchable and prefetchable windows which 
happen to be IO, 32bit and 64bit windows but this has nothing to do with 
the M32/M64 BAR registers in P7IOC/PHB3, do I understand this correctly?


> port of the PCIE switch behind root port. In order to support PCI
> hotplug, we extend the start/end address of those 3 windows of root
> port or upstream port to the start/end address of the 3 PHB's windows.
> The current implementation, assigning IO or M32 segment based on the
> bridge's windows, isn't reliable.
>
> The patch fixes above issue by calculating PE's consumed IO or M32
> segments from its contained devices, no PCI bridge windows involved
> if the PE doesn't contain all the subordinate PCI buses.

Please, rephrase it. How can PCI bridges be involved in PE consumption?


> Otherwise,
> the PCI bridge windows still contribute to PE's consumed IO or M32
> segments.

PCI bridge windows themselves consume PEs? Is that correct?


>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>   arch/powerpc/platforms/powernv/pci-ioda.c | 136 +++++++++++++++++-------------
>   1 file changed, 79 insertions(+), 57 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 488a53e..713f4b4 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -2844,75 +2844,97 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>   }
>   #endif /* CONFIG_PCI_IOV */
>
> -/*
> - * This function is supposed to be called on basis of PE from top
> - * to bottom style. So the the I/O or MMIO segment assigned to
> - * parent PE could be overrided by its child PEs if necessary.
> - */
> -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
> -				  struct pnv_ioda_pe *pe)
> +static int pnv_ioda_setup_one_res(struct pci_controller *hose,
> +				  struct pnv_ioda_pe *pe,
> +				  struct resource *res)
>   {
>   	struct pnv_phb *phb = hose->private_data;
>   	struct pci_bus_region region;
> -	struct resource *res;
> -	int i, index;
> -	unsigned int segsize;
> +	unsigned int index, segsize;
>   	unsigned long *segmap, *pe_segmap;
>   	uint16_t win;
>   	int64_t rc;
>
> -	/*
> -	 * NOTE: We only care PCI bus based PE for now. For PCI
> -	 * device based PE, for example SRIOV sensitive VF should
> -	 * be figured out later.
> -	 */
> -	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
> +	/* Check if we need map the resource */
> +	if (!res->parent || !res->flags || res->start > res->end)

res->start >= res->end ?


> +		return 0;
>
> -	pci_bus_for_each_resource(pe->pbus, res, i) {
> -		if (!res || !res->flags ||
> -		    res->start > res->end)
> -			continue;
> +	if (res->flags & IORESOURCE_IO) {
> +		region.start = res->start - phb->ioda.io_pci_base;
> +		region.end   = res->end - phb->ioda.io_pci_base;
> +		segsize      = phb->ioda.io_segsize;
> +		segmap       = phb->ioda.io_segmap;
> +		pe_segmap    = pe->io_segmap;
> +		win          = OPAL_IO_WINDOW_TYPE;
> +	} else if ((res->flags & IORESOURCE_MEM) &&
> +		   !pnv_pci_is_mem_pref_64(res->flags)) {
> +		region.start = res->start -
> +			       hose->mem_offset[0] -
> +			       phb->ioda.m32_pci_base;
> +		region.end   = res->end -
> +			       hose->mem_offset[0] -
> +			       phb->ioda.m32_pci_base;
> +		segsize      = phb->ioda.m32_segsize;
> +		segmap       = phb->ioda.m32_segmap;
> +		pe_segmap    = pe->m32_segmap;
> +		win          = OPAL_M32_WINDOW_TYPE;
> +	} else {
> +		return 0;
> +	}
>
> -		if (res->flags & IORESOURCE_IO) {
> -			region.start = res->start - phb->ioda.io_pci_base;
> -			region.end   = res->end - phb->ioda.io_pci_base;
> -			segsize      = phb->ioda.io_segsize;
> -			segmap       = phb->ioda.io_segmap;
> -			pe_segmap    = pe->io_segmap;
> -			win          = OPAL_IO_WINDOW_TYPE;
> -		} else if ((res->flags & IORESOURCE_MEM) &&
> -			   !pnv_pci_is_mem_pref_64(res->flags)) {
> -			region.start = res->start -
> -				       hose->mem_offset[0] -
> -				       phb->ioda.m32_pci_base;
> -			region.end   = res->end -
> -				       hose->mem_offset[0] -
> -				       phb->ioda.m32_pci_base;
> -			segsize      = phb->ioda.m32_segsize;
> -			segmap       = phb->ioda.m32_segmap;
> -			pe_segmap    = pe->m32_segmap;
> -			win          = OPAL_M32_WINDOW_TYPE;
> -		} else {
> -			continue;
> +	region.start = _ALIGN_DOWN(region.start, segsize);
> +	region.end   = _ALIGN_UP(region.end, segsize);
> +	index = region.start / segsize;
> +	while (index < phb->ioda.total_pe &&
> +	       region.start < region.end) {
> +		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> +				pe->pe_number, win, 0, index);
> +		if (rc != OPAL_SUCCESS) {
> +			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
> +				__func__, rc, win, index,
> +				pe->phb->hose->global_number,
> +				pe->pe_number);
> +			return -EIO;
>   		}
>
> -		index = region.start / phb->ioda.io_segsize;
> -		while (index < phb->ioda.total_pe &&
> -		       region.start <= region.end) {
> -			set_bit(index, segmap);
> -			set_bit(index, pe_segmap);
> -			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -					pe->pe_number, win, 0, index);
> -			if (rc != OPAL_SUCCESS) {
> -				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
> -					__func__, rc, win, index,
> -					pe->phb->hose->global_number,
> -					pe->pe_number);
> -				break;
> -			}
> +		set_bit(index, segmap);
> +		set_bit(index, pe_segmap);
> +		region.start += segsize;
> +		index++;
> +	}
> +
> +	return 0;
> +}
> +
> +static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
> +				  struct pnv_ioda_pe *pe)
> +{
> +	struct pci_dev *pdev;
> +	struct resource *res;
> +	int i;
> +
> +	/* This function only works for bus dependent PE */
> +	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
> +
> +	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
> +		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
> +			res = &pdev->resource[i];
> +			if (pnv_ioda_setup_one_res(hose, pe, res))
> +				return;
> +		}
> +
> +		/* If the PE contains all subordinate PCI buses, the
> +		 * resources of the child bridges should be mapped
> +		 * to the PE as well.
> +		 */
> +		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) ||
> +		    (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
> +			continue;
>
> -			region.start += segsize;
> -			index++;
> +		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
> +			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
> +			if (pnv_ioda_setup_one_res(hose, pe, res))
> +				return;
>   		}
>   	}
>   }
>
Gavin Shan Aug. 11, 2015, 12:12 a.m. UTC | #2
On Mon, Aug 10, 2015 at 05:40:08PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>There're 3 windows (IO, M32 and M64) for PHB, root port and upstream
>
>These are actually IO, non-prefetchable and prefetchable windows which happen
>to be IO, 32bit and 64bit windows but this has nothing to do with the M32/M64
>BAR registers in P7IOC/PHB3, do I understand this correctly?
>

In pci-ioda.c, we have below definiations that are defined when
developing the code, not from any specification:

IO  - resources with IO property
M32 - 32-bits or non-prefetchable resources
M64 - 64-bits and prefetchable resources

>>port of the PCIE switch behind root port. In order to support PCI
>>hotplug, we extend the start/end address of those 3 windows of root
>>port or upstream port to the start/end address of the 3 PHB's windows.
>>The current implementation, assigning IO or M32 segment based on the
>>bridge's windows, isn't reliable.
>>
>>The patch fixes above issue by calculating PE's consumed IO or M32
>>segments from its contained devices, no PCI bridge windows involved
>>if the PE doesn't contain all the subordinate PCI buses.
>
>Please, rephrase it. How can PCI bridges be involved in PE consumption?
>

Ok. Will add something like below:

if the PE, corresponding to the PCI bus, doesn't contain all the subordinate
PCI buses.

>
>>Otherwise,
>>the PCI bridge windows still contribute to PE's consumed IO or M32
>>segments.
>
>PCI bridge windows themselves consume PEs? Is that correct?
>

PCI bridge windows consume IO, M32, M64 segments, not PEs.

>>
>>Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>---
>>  arch/powerpc/platforms/powernv/pci-ioda.c | 136 +++++++++++++++++-------------
>>  1 file changed, 79 insertions(+), 57 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 488a53e..713f4b4 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -2844,75 +2844,97 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>  }
>>  #endif /* CONFIG_PCI_IOV */
>>
>>-/*
>>- * This function is supposed to be called on basis of PE from top
>>- * to bottom style. So the the I/O or MMIO segment assigned to
>>- * parent PE could be overrided by its child PEs if necessary.
>>- */
>>-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>-				  struct pnv_ioda_pe *pe)
>>+static int pnv_ioda_setup_one_res(struct pci_controller *hose,
>>+				  struct pnv_ioda_pe *pe,
>>+				  struct resource *res)
>>  {
>>  	struct pnv_phb *phb = hose->private_data;
>>  	struct pci_bus_region region;
>>-	struct resource *res;
>>-	int i, index;
>>-	unsigned int segsize;
>>+	unsigned int index, segsize;
>>  	unsigned long *segmap, *pe_segmap;
>>  	uint16_t win;
>>  	int64_t rc;
>>
>>-	/*
>>-	 * NOTE: We only care PCI bus based PE for now. For PCI
>>-	 * device based PE, for example SRIOV sensitive VF should
>>-	 * be figured out later.
>>-	 */
>>-	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>+	/* Check if we need map the resource */
>>+	if (!res->parent || !res->flags || res->start > res->end)
>
>res->start >= res->end ?
>

No, res->start == res->end is valid.

>
>>+		return 0;
>>
>>-	pci_bus_for_each_resource(pe->pbus, res, i) {
>>-		if (!res || !res->flags ||
>>-		    res->start > res->end)
>>-			continue;
>>+	if (res->flags & IORESOURCE_IO) {
>>+		region.start = res->start - phb->ioda.io_pci_base;
>>+		region.end   = res->end - phb->ioda.io_pci_base;
>>+		segsize      = phb->ioda.io_segsize;
>>+		segmap       = phb->ioda.io_segmap;
>>+		pe_segmap    = pe->io_segmap;
>>+		win          = OPAL_IO_WINDOW_TYPE;
>>+	} else if ((res->flags & IORESOURCE_MEM) &&
>>+		   !pnv_pci_is_mem_pref_64(res->flags)) {
>>+		region.start = res->start -
>>+			       hose->mem_offset[0] -
>>+			       phb->ioda.m32_pci_base;
>>+		region.end   = res->end -
>>+			       hose->mem_offset[0] -
>>+			       phb->ioda.m32_pci_base;
>>+		segsize      = phb->ioda.m32_segsize;
>>+		segmap       = phb->ioda.m32_segmap;
>>+		pe_segmap    = pe->m32_segmap;
>>+		win          = OPAL_M32_WINDOW_TYPE;
>>+	} else {
>>+		return 0;
>>+	}
>>
>>-		if (res->flags & IORESOURCE_IO) {
>>-			region.start = res->start - phb->ioda.io_pci_base;
>>-			region.end   = res->end - phb->ioda.io_pci_base;
>>-			segsize      = phb->ioda.io_segsize;
>>-			segmap       = phb->ioda.io_segmap;
>>-			pe_segmap    = pe->io_segmap;
>>-			win          = OPAL_IO_WINDOW_TYPE;
>>-		} else if ((res->flags & IORESOURCE_MEM) &&
>>-			   !pnv_pci_is_mem_pref_64(res->flags)) {
>>-			region.start = res->start -
>>-				       hose->mem_offset[0] -
>>-				       phb->ioda.m32_pci_base;
>>-			region.end   = res->end -
>>-				       hose->mem_offset[0] -
>>-				       phb->ioda.m32_pci_base;
>>-			segsize      = phb->ioda.m32_segsize;
>>-			segmap       = phb->ioda.m32_segmap;
>>-			pe_segmap    = pe->m32_segmap;
>>-			win          = OPAL_M32_WINDOW_TYPE;
>>-		} else {
>>-			continue;
>>+	region.start = _ALIGN_DOWN(region.start, segsize);
>>+	region.end   = _ALIGN_UP(region.end, segsize);
>>+	index = region.start / segsize;
>>+	while (index < phb->ioda.total_pe &&
>>+	       region.start < region.end) {
>>+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>+				pe->pe_number, win, 0, index);
>>+		if (rc != OPAL_SUCCESS) {
>>+			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>+				__func__, rc, win, index,
>>+				pe->phb->hose->global_number,
>>+				pe->pe_number);
>>+			return -EIO;
>>  		}
>>
>>-		index = region.start / phb->ioda.io_segsize;
>>-		while (index < phb->ioda.total_pe &&
>>-		       region.start <= region.end) {
>>-			set_bit(index, segmap);
>>-			set_bit(index, pe_segmap);
>>-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>-					pe->pe_number, win, 0, index);
>>-			if (rc != OPAL_SUCCESS) {
>>-				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>-					__func__, rc, win, index,
>>-					pe->phb->hose->global_number,
>>-					pe->pe_number);
>>-				break;
>>-			}
>>+		set_bit(index, segmap);
>>+		set_bit(index, pe_segmap);
>>+		region.start += segsize;
>>+		index++;
>>+	}
>>+
>>+	return 0;
>>+}
>>+
>>+static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>+				  struct pnv_ioda_pe *pe)
>>+{
>>+	struct pci_dev *pdev;
>>+	struct resource *res;
>>+	int i;
>>+
>>+	/* This function only works for bus dependent PE */
>>+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>+
>>+	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
>>+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
>>+			res = &pdev->resource[i];
>>+			if (pnv_ioda_setup_one_res(hose, pe, res))
>>+				return;
>>+		}
>>+
>>+		/* If the PE contains all subordinate PCI buses, the
>>+		 * resources of the child bridges should be mapped
>>+		 * to the PE as well.
>>+		 */
>>+		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) ||
>>+		    (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
>>+			continue;
>>
>>-			region.start += segsize;
>>-			index++;
>>+		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
>>+			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>>+			if (pnv_ioda_setup_one_res(hose, pe, res))
>>+				return;
>>  		}
>>  	}
>>  }
>>

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy Aug. 11, 2015, 2:32 a.m. UTC | #3
On 08/11/2015 10:12 AM, Gavin Shan wrote:
> On Mon, Aug 10, 2015 at 05:40:08PM +1000, Alexey Kardashevskiy wrote:
>> On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>> There're 3 windows (IO, M32 and M64) for PHB, root port and upstream
>>
>> These are actually IO, non-prefetchable and prefetchable windows which happen
>> to be IO, 32bit and 64bit windows but this has nothing to do with the M32/M64
>> BAR registers in P7IOC/PHB3, do I understand this correctly?
>>
>
> In pci-ioda.c, we have below definiations that are defined when
> developing the code, not from any specification:
>
> IO  - resources with IO property
> M32 - 32-bits or non-prefetchable resources
> M64 - 64-bits and prefetchable resources


This what I am saying - it is incorrect and confusing. M32/M64 are PHB3 
register names and associated windows (with "M" in the beginning) but not 
device resources.


>>> port of the PCIE switch behind root port. In order to support PCI
>>> hotplug, we extend the start/end address of those 3 windows of root
>>> port or upstream port to the start/end address of the 3 PHB's windows.
>>> The current implementation, assigning IO or M32 segment based on the
>>> bridge's windows, isn't reliable.
>>>
>>> The patch fixes above issue by calculating PE's consumed IO or M32
>>> segments from its contained devices, no PCI bridge windows involved
>>> if the PE doesn't contain all the subordinate PCI buses.
>>
>> Please, rephrase it. How can PCI bridges be involved in PE consumption?
>>
>
> Ok. Will add something like below:
>
> if the PE, corresponding to the PCI bus, doesn't contain all the subordinate
> PCI buses.


No, my question was about "PCI bridge windows involved" - what do you do to 
the windows if PE does not own all child buses?



>>
>>> Otherwise,
>>> the PCI bridge windows still contribute to PE's consumed IO or M32
>>> segments.
>>
>> PCI bridge windows themselves consume PEs? Is that correct?
>>
>
> PCI bridge windows consume IO, M32, M64 segments, not PEs.

Ah, right.


>>>
>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>> ---
>>>   arch/powerpc/platforms/powernv/pci-ioda.c | 136 +++++++++++++++++-------------
>>>   1 file changed, 79 insertions(+), 57 deletions(-)
>>>
>>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>> index 488a53e..713f4b4 100644
>>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>> @@ -2844,75 +2844,97 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>   }
>>>   #endif /* CONFIG_PCI_IOV */
>>>
>>> -/*
>>> - * This function is supposed to be called on basis of PE from top
>>> - * to bottom style. So the the I/O or MMIO segment assigned to
>>> - * parent PE could be overrided by its child PEs if necessary.
>>> - */
>>> -static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>> -				  struct pnv_ioda_pe *pe)
>>> +static int pnv_ioda_setup_one_res(struct pci_controller *hose,
>>> +				  struct pnv_ioda_pe *pe,
>>> +				  struct resource *res)
>>>   {
>>>   	struct pnv_phb *phb = hose->private_data;
>>>   	struct pci_bus_region region;
>>> -	struct resource *res;
>>> -	int i, index;
>>> -	unsigned int segsize;
>>> +	unsigned int index, segsize;
>>>   	unsigned long *segmap, *pe_segmap;
>>>   	uint16_t win;
>>>   	int64_t rc;
>>>
>>> -	/*
>>> -	 * NOTE: We only care PCI bus based PE for now. For PCI
>>> -	 * device based PE, for example SRIOV sensitive VF should
>>> -	 * be figured out later.
>>> -	 */
>>> -	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>> +	/* Check if we need map the resource */
>>> +	if (!res->parent || !res->flags || res->start > res->end)
>>
>> res->start >= res->end ?
>>
>
> No, res->start == res->end is valid.
>
>>
>>> +		return 0;
>>>
>>> -	pci_bus_for_each_resource(pe->pbus, res, i) {
>>> -		if (!res || !res->flags ||
>>> -		    res->start > res->end)
>>> -			continue;
>>> +	if (res->flags & IORESOURCE_IO) {
>>> +		region.start = res->start - phb->ioda.io_pci_base;
>>> +		region.end   = res->end - phb->ioda.io_pci_base;
>>> +		segsize      = phb->ioda.io_segsize;
>>> +		segmap       = phb->ioda.io_segmap;
>>> +		pe_segmap    = pe->io_segmap;
>>> +		win          = OPAL_IO_WINDOW_TYPE;
>>> +	} else if ((res->flags & IORESOURCE_MEM) &&
>>> +		   !pnv_pci_is_mem_pref_64(res->flags)) {
>>> +		region.start = res->start -
>>> +			       hose->mem_offset[0] -
>>> +			       phb->ioda.m32_pci_base;
>>> +		region.end   = res->end -
>>> +			       hose->mem_offset[0] -
>>> +			       phb->ioda.m32_pci_base;
>>> +		segsize      = phb->ioda.m32_segsize;
>>> +		segmap       = phb->ioda.m32_segmap;
>>> +		pe_segmap    = pe->m32_segmap;
>>> +		win          = OPAL_M32_WINDOW_TYPE;
>>> +	} else {
>>> +		return 0;
>>> +	}
>>>
>>> -		if (res->flags & IORESOURCE_IO) {
>>> -			region.start = res->start - phb->ioda.io_pci_base;
>>> -			region.end   = res->end - phb->ioda.io_pci_base;
>>> -			segsize      = phb->ioda.io_segsize;
>>> -			segmap       = phb->ioda.io_segmap;
>>> -			pe_segmap    = pe->io_segmap;
>>> -			win          = OPAL_IO_WINDOW_TYPE;
>>> -		} else if ((res->flags & IORESOURCE_MEM) &&
>>> -			   !pnv_pci_is_mem_pref_64(res->flags)) {
>>> -			region.start = res->start -
>>> -				       hose->mem_offset[0] -
>>> -				       phb->ioda.m32_pci_base;
>>> -			region.end   = res->end -
>>> -				       hose->mem_offset[0] -
>>> -				       phb->ioda.m32_pci_base;
>>> -			segsize      = phb->ioda.m32_segsize;
>>> -			segmap       = phb->ioda.m32_segmap;
>>> -			pe_segmap    = pe->m32_segmap;
>>> -			win          = OPAL_M32_WINDOW_TYPE;
>>> -		} else {
>>> -			continue;
>>> +	region.start = _ALIGN_DOWN(region.start, segsize);
>>> +	region.end   = _ALIGN_UP(region.end, segsize);
>>> +	index = region.start / segsize;
>>> +	while (index < phb->ioda.total_pe &&
>>> +	       region.start < region.end) {
>>> +		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>> +				pe->pe_number, win, 0, index);
>>> +		if (rc != OPAL_SUCCESS) {
>>> +			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>> +				__func__, rc, win, index,
>>> +				pe->phb->hose->global_number,
>>> +				pe->pe_number);
>>> +			return -EIO;
>>>   		}
>>>
>>> -		index = region.start / phb->ioda.io_segsize;
>>> -		while (index < phb->ioda.total_pe &&
>>> -		       region.start <= region.end) {
>>> -			set_bit(index, segmap);
>>> -			set_bit(index, pe_segmap);
>>> -			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>> -					pe->pe_number, win, 0, index);
>>> -			if (rc != OPAL_SUCCESS) {
>>> -				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>> -					__func__, rc, win, index,
>>> -					pe->phb->hose->global_number,
>>> -					pe->pe_number);
>>> -				break;
>>> -			}
>>> +		set_bit(index, segmap);
>>> +		set_bit(index, pe_segmap);
>>> +		region.start += segsize;
>>> +		index++;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>> +				  struct pnv_ioda_pe *pe)
>>> +{
>>> +	struct pci_dev *pdev;
>>> +	struct resource *res;
>>> +	int i;
>>> +
>>> +	/* This function only works for bus dependent PE */
>>> +	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>> +
>>> +	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
>>> +		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
>>> +			res = &pdev->resource[i];
>>> +			if (pnv_ioda_setup_one_res(hose, pe, res))
>>> +				return;
>>> +		}
>>> +
>>> +		/* If the PE contains all subordinate PCI buses, the
>>> +		 * resources of the child bridges should be mapped
>>> +		 * to the PE as well.
>>> +		 */
>>> +		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) ||
>>> +		    (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
>>> +			continue;
>>>
>>> -			region.start += segsize;
>>> -			index++;
>>> +		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
>>> +			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>>> +			if (pnv_ioda_setup_one_res(hose, pe, res))
>>> +				return;
>>>   		}
>>>   	}
>>>   }
>>>
>
> Thanks,
> Gavin
>
Gavin Shan Aug. 12, 2015, 11:42 p.m. UTC | #4
On Tue, Aug 11, 2015 at 12:32:13PM +1000, Alexey Kardashevskiy wrote:
>On 08/11/2015 10:12 AM, Gavin Shan wrote:
>>On Mon, Aug 10, 2015 at 05:40:08PM +1000, Alexey Kardashevskiy wrote:
>>>On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>>>There're 3 windows (IO, M32 and M64) for PHB, root port and upstream
>>>
>>>These are actually IO, non-prefetchable and prefetchable windows which happen
>>>to be IO, 32bit and 64bit windows but this has nothing to do with the M32/M64
>>>BAR registers in P7IOC/PHB3, do I understand this correctly?
>>>
>>
>>In pci-ioda.c, we have below definiations that are defined when
>>developing the code, not from any specification:
>>
>>IO  - resources with IO property
>>M32 - 32-bits or non-prefetchable resources
>>M64 - 64-bits and prefetchable resources
>
>
>This what I am saying - it is incorrect and confusing. M32/M64 are PHB3
>register names and associated windows (with "M" in the beginning) but not
>device resources.
>

I don't see how it's incorrect and confusing. M32/M64 are not PHB3
register names. Also, device resource is either IO, 32-bits prefetchable,
memory, 32-bits non-prefetchable memory, 64-bits non-prefetchable memory,
64-bits prefetchable memory. They match with IO, M32, M64.


>>>>port of the PCIE switch behind root port. In order to support PCI
>>>>hotplug, we extend the start/end address of those 3 windows of root
>>>>port or upstream port to the start/end address of the 3 PHB's windows.
>>>>The current implementation, assigning IO or M32 segment based on the
>>>>bridge's windows, isn't reliable.
>>>>
>>>>The patch fixes above issue by calculating PE's consumed IO or M32
>>>>segments from its contained devices, no PCI bridge windows involved
>>>>if the PE doesn't contain all the subordinate PCI buses.
>>>
>>>Please, rephrase it. How can PCI bridges be involved in PE consumption?
>>>
>>
>>Ok. Will add something like below:
>>
>>if the PE, corresponding to the PCI bus, doesn't contain all the subordinate
>>PCI buses.
>
>
>No, my question was about "PCI bridge windows involved" - what do you do to
>the windows if PE does not own all child buses?
>

All of it is about the original implementation: When the PE doesn't include
all child buses, the resource consumed by the PE is: resources assigned to
current PCI bus and then exclude the resources assigned to the child buses.
Note that PCI bridge windows are actually PCI bus's resource.

>>>
>>>>Otherwise,
>>>>the PCI bridge windows still contribute to PE's consumed IO or M32
>>>>segments.
>>>
>>>PCI bridge windows themselves consume PEs? Is that correct?
>>>
>>
>>PCI bridge windows consume IO, M32, M64 segments, not PEs.
>
>Ah, right.
>
>
>>>>
>>>>Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>---
>>>>  arch/powerpc/platforms/powernv/pci-ioda.c | 136 +++++++++++++++++-------------
>>>>  1 file changed, 79 insertions(+), 57 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>index 488a53e..713f4b4 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>@@ -2844,75 +2844,97 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>  }
>>>>  #endif /* CONFIG_PCI_IOV */
>>>>
>>>>-/*
>>>>- * This function is supposed to be called on basis of PE from top
>>>>- * to bottom style. So the the I/O or MMIO segment assigned to
>>>>- * parent PE could be overrided by its child PEs if necessary.
>>>>- */
>>>>-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>>>-				  struct pnv_ioda_pe *pe)
>>>>+static int pnv_ioda_setup_one_res(struct pci_controller *hose,
>>>>+				  struct pnv_ioda_pe *pe,
>>>>+				  struct resource *res)
>>>>  {
>>>>  	struct pnv_phb *phb = hose->private_data;
>>>>  	struct pci_bus_region region;
>>>>-	struct resource *res;
>>>>-	int i, index;
>>>>-	unsigned int segsize;
>>>>+	unsigned int index, segsize;
>>>>  	unsigned long *segmap, *pe_segmap;
>>>>  	uint16_t win;
>>>>  	int64_t rc;
>>>>
>>>>-	/*
>>>>-	 * NOTE: We only care PCI bus based PE for now. For PCI
>>>>-	 * device based PE, for example SRIOV sensitive VF should
>>>>-	 * be figured out later.
>>>>-	 */
>>>>-	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>>>+	/* Check if we need map the resource */
>>>>+	if (!res->parent || !res->flags || res->start > res->end)
>>>
>>>res->start >= res->end ?
>>>
>>
>>No, res->start == res->end is valid.
>>
>>>
>>>>+		return 0;
>>>>
>>>>-	pci_bus_for_each_resource(pe->pbus, res, i) {
>>>>-		if (!res || !res->flags ||
>>>>-		    res->start > res->end)
>>>>-			continue;
>>>>+	if (res->flags & IORESOURCE_IO) {
>>>>+		region.start = res->start - phb->ioda.io_pci_base;
>>>>+		region.end   = res->end - phb->ioda.io_pci_base;
>>>>+		segsize      = phb->ioda.io_segsize;
>>>>+		segmap       = phb->ioda.io_segmap;
>>>>+		pe_segmap    = pe->io_segmap;
>>>>+		win          = OPAL_IO_WINDOW_TYPE;
>>>>+	} else if ((res->flags & IORESOURCE_MEM) &&
>>>>+		   !pnv_pci_is_mem_pref_64(res->flags)) {
>>>>+		region.start = res->start -
>>>>+			       hose->mem_offset[0] -
>>>>+			       phb->ioda.m32_pci_base;
>>>>+		region.end   = res->end -
>>>>+			       hose->mem_offset[0] -
>>>>+			       phb->ioda.m32_pci_base;
>>>>+		segsize      = phb->ioda.m32_segsize;
>>>>+		segmap       = phb->ioda.m32_segmap;
>>>>+		pe_segmap    = pe->m32_segmap;
>>>>+		win          = OPAL_M32_WINDOW_TYPE;
>>>>+	} else {
>>>>+		return 0;
>>>>+	}
>>>>
>>>>-		if (res->flags & IORESOURCE_IO) {
>>>>-			region.start = res->start - phb->ioda.io_pci_base;
>>>>-			region.end   = res->end - phb->ioda.io_pci_base;
>>>>-			segsize      = phb->ioda.io_segsize;
>>>>-			segmap       = phb->ioda.io_segmap;
>>>>-			pe_segmap    = pe->io_segmap;
>>>>-			win          = OPAL_IO_WINDOW_TYPE;
>>>>-		} else if ((res->flags & IORESOURCE_MEM) &&
>>>>-			   !pnv_pci_is_mem_pref_64(res->flags)) {
>>>>-			region.start = res->start -
>>>>-				       hose->mem_offset[0] -
>>>>-				       phb->ioda.m32_pci_base;
>>>>-			region.end   = res->end -
>>>>-				       hose->mem_offset[0] -
>>>>-				       phb->ioda.m32_pci_base;
>>>>-			segsize      = phb->ioda.m32_segsize;
>>>>-			segmap       = phb->ioda.m32_segmap;
>>>>-			pe_segmap    = pe->m32_segmap;
>>>>-			win          = OPAL_M32_WINDOW_TYPE;
>>>>-		} else {
>>>>-			continue;
>>>>+	region.start = _ALIGN_DOWN(region.start, segsize);
>>>>+	region.end   = _ALIGN_UP(region.end, segsize);
>>>>+	index = region.start / segsize;
>>>>+	while (index < phb->ioda.total_pe &&
>>>>+	       region.start < region.end) {
>>>>+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>>>+				pe->pe_number, win, 0, index);
>>>>+		if (rc != OPAL_SUCCESS) {
>>>>+			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>>>+				__func__, rc, win, index,
>>>>+				pe->phb->hose->global_number,
>>>>+				pe->pe_number);
>>>>+			return -EIO;
>>>>  		}
>>>>
>>>>-		index = region.start / phb->ioda.io_segsize;
>>>>-		while (index < phb->ioda.total_pe &&
>>>>-		       region.start <= region.end) {
>>>>-			set_bit(index, segmap);
>>>>-			set_bit(index, pe_segmap);
>>>>-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>>>-					pe->pe_number, win, 0, index);
>>>>-			if (rc != OPAL_SUCCESS) {
>>>>-				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
>>>>-					__func__, rc, win, index,
>>>>-					pe->phb->hose->global_number,
>>>>-					pe->pe_number);
>>>>-				break;
>>>>-			}
>>>>+		set_bit(index, segmap);
>>>>+		set_bit(index, pe_segmap);
>>>>+		region.start += segsize;
>>>>+		index++;
>>>>+	}
>>>>+
>>>>+	return 0;
>>>>+}
>>>>+
>>>>+static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
>>>>+				  struct pnv_ioda_pe *pe)
>>>>+{
>>>>+	struct pci_dev *pdev;
>>>>+	struct resource *res;
>>>>+	int i;
>>>>+
>>>>+	/* This function only works for bus dependent PE */
>>>>+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
>>>>+
>>>>+	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
>>>>+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
>>>>+			res = &pdev->resource[i];
>>>>+			if (pnv_ioda_setup_one_res(hose, pe, res))
>>>>+				return;
>>>>+		}
>>>>+
>>>>+		/* If the PE contains all subordinate PCI buses, the
>>>>+		 * resources of the child bridges should be mapped
>>>>+		 * to the PE as well.
>>>>+		 */
>>>>+		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) ||
>>>>+		    (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
>>>>+			continue;
>>>>
>>>>-			region.start += segsize;
>>>>-			index++;
>>>>+		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
>>>>+			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>>>>+			if (pnv_ioda_setup_one_res(hose, pe, res))
>>>>+				return;
>>>>  		}
>>>>  	}
>>>>  }
>>>>
>>

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 488a53e..713f4b4 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2844,75 +2844,97 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_IOV */
 
-/*
- * This function is supposed to be called on basis of PE from top
- * to bottom style. So the the I/O or MMIO segment assigned to
- * parent PE could be overrided by its child PEs if necessary.
- */
-static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
-				  struct pnv_ioda_pe *pe)
+static int pnv_ioda_setup_one_res(struct pci_controller *hose,
+				  struct pnv_ioda_pe *pe,
+				  struct resource *res)
 {
 	struct pnv_phb *phb = hose->private_data;
 	struct pci_bus_region region;
-	struct resource *res;
-	int i, index;
-	unsigned int segsize;
+	unsigned int index, segsize;
 	unsigned long *segmap, *pe_segmap;
 	uint16_t win;
 	int64_t rc;
 
-	/*
-	 * NOTE: We only care PCI bus based PE for now. For PCI
-	 * device based PE, for example SRIOV sensitive VF should
-	 * be figured out later.
-	 */
-	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+	/* Check if we need map the resource */
+	if (!res->parent || !res->flags || res->start > res->end)
+		return 0;
 
-	pci_bus_for_each_resource(pe->pbus, res, i) {
-		if (!res || !res->flags ||
-		    res->start > res->end)
-			continue;
+	if (res->flags & IORESOURCE_IO) {
+		region.start = res->start - phb->ioda.io_pci_base;
+		region.end   = res->end - phb->ioda.io_pci_base;
+		segsize      = phb->ioda.io_segsize;
+		segmap       = phb->ioda.io_segmap;
+		pe_segmap    = pe->io_segmap;
+		win          = OPAL_IO_WINDOW_TYPE;
+	} else if ((res->flags & IORESOURCE_MEM) &&
+		   !pnv_pci_is_mem_pref_64(res->flags)) {
+		region.start = res->start -
+			       hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		region.end   = res->end -
+			       hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		segsize      = phb->ioda.m32_segsize;
+		segmap       = phb->ioda.m32_segmap;
+		pe_segmap    = pe->m32_segmap;
+		win          = OPAL_M32_WINDOW_TYPE;
+	} else {
+		return 0;
+	}
 
-		if (res->flags & IORESOURCE_IO) {
-			region.start = res->start - phb->ioda.io_pci_base;
-			region.end   = res->end - phb->ioda.io_pci_base;
-			segsize      = phb->ioda.io_segsize;
-			segmap       = phb->ioda.io_segmap;
-			pe_segmap    = pe->io_segmap;
-			win          = OPAL_IO_WINDOW_TYPE;
-		} else if ((res->flags & IORESOURCE_MEM) &&
-			   !pnv_pci_is_mem_pref_64(res->flags)) {
-			region.start = res->start -
-				       hose->mem_offset[0] -
-				       phb->ioda.m32_pci_base;
-			region.end   = res->end -
-				       hose->mem_offset[0] -
-				       phb->ioda.m32_pci_base;
-			segsize      = phb->ioda.m32_segsize;
-			segmap       = phb->ioda.m32_segmap;
-			pe_segmap    = pe->m32_segmap;
-			win          = OPAL_M32_WINDOW_TYPE;
-		} else {
-			continue;
+	region.start = _ALIGN_DOWN(region.start, segsize);
+	region.end   = _ALIGN_UP(region.end, segsize);
+	index = region.start / segsize;
+	while (index < phb->ioda.total_pe &&
+	       region.start < region.end) {
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				pe->pe_number, win, 0, index);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
+				__func__, rc, win, index,
+				pe->phb->hose->global_number,
+				pe->pe_number);
+			return -EIO;
 		}
 
-		index = region.start / phb->ioda.io_segsize;
-		while (index < phb->ioda.total_pe &&
-		       region.start <= region.end) {
-			set_bit(index, segmap);
-			set_bit(index, pe_segmap);
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					pe->pe_number, win, 0, index);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("%s: Error %lld mapping (%d) seg#%d to PHB#%d-PE#%d\n",
-					__func__, rc, win, index,
-					pe->phb->hose->global_number,
-					pe->pe_number);
-				break;
-			}
+		set_bit(index, segmap);
+		set_bit(index, pe_segmap);
+		region.start += segsize;
+		index++;
+	}
+
+	return 0;
+}
+
+static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
+				  struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *pdev;
+	struct resource *res;
+	int i;
+
+	/* This function only works for bus dependent PE */
+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+			res = &pdev->resource[i];
+			if (pnv_ioda_setup_one_res(hose, pe, res))
+				return;
+		}
+
+		/* If the PE contains all subordinate PCI buses, the
+		 * resources of the child bridges should be mapped
+		 * to the PE as well.
+		 */
+		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) ||
+		    (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			continue;
 
-			region.start += segsize;
-			index++;
+		for (i = 0; i <= PCI_BRIDGE_RESOURCE_NUM; i++) {
+			res = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
+			if (pnv_ioda_setup_one_res(hose, pe, res))
+				return;
 		}
 	}
 }