diff mbox

[V2,4/6] powerpc/powernv: replace the hard coded boundary with gate

Message ID 1438737903-10399-5-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wei Yang Aug. 5, 2015, 1:25 a.m. UTC
Based on the limitation of M64 Window size, when VF BAR size is bigger than
64MB, IOV BAR just round up power of 2 of the total_vfs. While the 64MB is
a magic boundary in code, which is hard to maintain.

This patch replaces the hard coded boundary with gate, which is calculated
from m64_segsize and adds comment to explain the reason for it.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

Comments

Gavin Shan Aug. 6, 2015, 5:26 a.m. UTC | #1
On Wed, Aug 05, 2015 at 09:25:01AM +0800, Wei Yang wrote:
>Based on the limitation of M64 Window size, when VF BAR size is bigger than
>64MB, IOV BAR just round up power of 2 of the total_vfs. While the 64MB is
>a magic boundary in code, which is hard to maintain.
>
>This patch replaces the hard coded boundary with gate, which is calculated
>from m64_segsize and adds comment to explain the reason for it.
>
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>---
> arch/powerpc/platforms/powernv/pci-ioda.c |   22 +++++++++++++++++-----
> 1 file changed, 17 insertions(+), 5 deletions(-)
>
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>index f5d110c..31dcedc 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -2702,7 +2702,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> 	struct pnv_phb *phb;
> 	struct resource *res;
> 	int i;
>-	resource_size_t size;
>+	resource_size_t size, gate;
> 	struct pci_dn *pdn;
> 	int mul, total_vfs;
>
>@@ -2718,6 +2718,17 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>
> 	total_vfs = pci_sriov_get_totalvfs(pdev);
> 	mul = phb->ioda.total_pe;
>+	/*
>+	 * If bigger than or equal to half of m64_segsize, just round up power
>+	 * of two.
>+	 *
>+	 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict with
>+	 * other devices, IOV BAR size is expanded to be (total_pe * VF size).
>+	 * When VF size is half of m64_segsize , the expanded size would equal
>+	 * to half of the whole M64 Window size, which will exhaust the M64
>+	 * Window and limit the system flexibility.
>+	 */

s/VF size/VF BAR size
s/m64_segsize/M64 segment size
s/M64 Window/M64 space

>+	gate = phb->ioda.m64_segsize >> 1;
>
> 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>@@ -2732,10 +2743,11 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>
> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>
>-		/* bigger than 64M */
>-		if (size > (1 << 26)) {
>-			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
>-				 i, res);
>+		/* bigger than or equal to gate */
>+		if (size >= gate) {
>+			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size "
>+				"is bigger than %lld, roundup power2\n",
>+				 i, res, gate);

If I understand the changes correctly, single VF BAR size is still checked against
the "gate" (128MB), not the total VF BAR size. Recap the comments I gave last time:

I mean to check the sum of all VF BARs. For example, the VFs attached to its PF has two
VF BARs and each of them is 64MB. For this case, the MMIO resource can't be allocated
once extending them to 256 VFs. So we have to try "single-pe-mode" for this situation.
So the check becomes as below:

        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
        struct pnv_phb *phb = hose->private_data;
        resource_size_t total_vf_bar_sz = 0;
        resource_size_t gate;

        /* Some comments to explain the "gate" */
        gate = phb->m64_segsize / 2;
        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
                total_vf_bar_sz += pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);

        if (total_vf_bar_sz >= gate)
                /* single-pe-mode */
        else
                /* shared-mode */

> 			mul = roundup_pow_of_two(total_vfs);
> 			pdn->m64_single_mode = true;
> 			break;
>-- 
>1.7.9.5
>
Alexey Kardashevskiy Aug. 7, 2015, 9:11 a.m. UTC | #2
On 08/06/2015 03:26 PM, Gavin Shan wrote:
> On Wed, Aug 05, 2015 at 09:25:01AM +0800, Wei Yang wrote:
>> Based on the limitation of M64 Window size, when VF BAR size is bigger than
>> 64MB, IOV BAR just round up power of 2 of the total_vfs. While the 64MB is
>> a magic boundary in code, which is hard to maintain.
>>
>> This patch replaces the hard coded boundary with gate, which is calculated
>>from m64_segsize and adds comment to explain the reason for it.
>>
>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>> ---
>> arch/powerpc/platforms/powernv/pci-ioda.c |   22 +++++++++++++++++-----
>> 1 file changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index f5d110c..31dcedc 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -2702,7 +2702,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> 	struct pnv_phb *phb;
>> 	struct resource *res;
>> 	int i;
>> -	resource_size_t size;
>> +	resource_size_t size, gate;
>> 	struct pci_dn *pdn;
>> 	int mul, total_vfs;
>>
>> @@ -2718,6 +2718,17 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>
>> 	total_vfs = pci_sriov_get_totalvfs(pdev);
>> 	mul = phb->ioda.total_pe;
>> +	/*
>> +	 * If bigger than or equal to half of m64_segsize, just round up power
>> +	 * of two.
>> +	 *
>> +	 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict with
>> +	 * other devices, IOV BAR size is expanded to be (total_pe * VF size).
>> +	 * When VF size is half of m64_segsize , the expanded size would equal
>> +	 * to half of the whole M64 Window size, which will exhaust the M64
>> +	 * Window and limit the system flexibility.
>> +	 */
>
> s/VF size/VF BAR size
> s/m64_segsize/M64 segment size
> s/M64 Window/M64 space

I thought I started understanding the stuff and you just introduces new 
term - "M64 space". Not "64bit MMIO space" but "M64 space" - what is this? 
Is that 64GB 64bit MMIO window which we get from the hostboot?


>
>> +	gate = phb->ioda.m64_segsize >> 1;
>>
>> 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>> @@ -2732,10 +2743,11 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>
>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>
>> -		/* bigger than 64M */
>> -		if (size > (1 << 26)) {
>> -			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
>> -				 i, res);
>> +		/* bigger than or equal to gate */
>> +		if (size >= gate) {
>> +			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size "
>> +				"is bigger than %lld, roundup power2\n",
>> +				 i, res, gate);
>
> If I understand the changes correctly, single VF BAR size is still checked against
> the "gate" (128MB), not the total VF BAR size. Recap the comments I gave last time:
>
> I mean to check the sum of all VF BARs. For example, the VFs attached to its PF has two
> VF BARs and each of them is 64MB. For this case, the MMIO resource can't be allocated
> once extending them to 256 VFs. So we have to try "single-pe-mode" for this situation.
> So the check becomes as below:
>
>          struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>          struct pnv_phb *phb = hose->private_data;
>          resource_size_t total_vf_bar_sz = 0;
>          resource_size_t gate;
>
>          /* Some comments to explain the "gate" */
>          gate = phb->m64_segsize / 2;
>          for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>                  total_vf_bar_sz += pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
>
>          if (total_vf_bar_sz >= gate)


Why would be compare to the total size of the BARs? If VFs have 3 64MB BARs 
each (these are 64bit BARs so up to 3 per VF, right?), which is 192MB in 
total per VF, we can use 3 M64's, each in segmented mode (1 segment == 
64MB) and cover many VFs.



>                  /* single-pe-mode */
>          else
>                  /* shared-mode */
>
>> 			mul = roundup_pow_of_two(total_vfs);
>> 			pdn->m64_single_mode = true;
>> 			break;
>> --
>> 1.7.9.5
>>
>
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f5d110c..31dcedc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2702,7 +2702,7 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 	struct pnv_phb *phb;
 	struct resource *res;
 	int i;
-	resource_size_t size;
+	resource_size_t size, gate;
 	struct pci_dn *pdn;
 	int mul, total_vfs;
 
@@ -2718,6 +2718,17 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 
 	total_vfs = pci_sriov_get_totalvfs(pdev);
 	mul = phb->ioda.total_pe;
+	/*
+	 * If bigger than or equal to half of m64_segsize, just round up power
+	 * of two.
+	 *
+	 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict with
+	 * other devices, IOV BAR size is expanded to be (total_pe * VF size).
+	 * When VF size is half of m64_segsize , the expanded size would equal
+	 * to half of the whole M64 Window size, which will exhaust the M64
+	 * Window and limit the system flexibility.
+	 */
+	gate = phb->ioda.m64_segsize >> 1;
 
 	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
@@ -2732,10 +2743,11 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 
 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
 
-		/* bigger than 64M */
-		if (size > (1 << 26)) {
-			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
-				 i, res);
+		/* bigger than or equal to gate */
+		if (size >= gate) {
+			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size "
+				"is bigger than %lld, roundup power2\n",
+				 i, res, gate);
 			mul = roundup_pow_of_two(total_vfs);
 			pdn->m64_single_mode = true;
 			break;