diff mbox

[V2,1/6] powerpc/powernv: don't enable SRIOV when VF BAR contains non M64 BAR

Message ID 1438737903-10399-2-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wei Yang Aug. 5, 2015, 1:24 a.m. UTC
On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
M64 windwo, which means M64 BAR can't work on it.

This patch makes this explicit.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

Comments

Gavin Shan Aug. 6, 2015, 4:35 a.m. UTC | #1
On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>M64 windwo, which means M64 BAR can't work on it.
>

s/PHB_IODA2/PHB3
s/windwo/window

>This patch makes this explicit.
>
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>

The idea sounds right, but there is one question as below.

>---
> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
> 1 file changed, 9 insertions(+), 16 deletions(-)
>
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 5738d31..9b41dba 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
> 		if (!res->flags || !res->parent)
> 			continue;
>
>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>-			continue;
>-
> 		/*
> 		 * The actual IOV BAR range is determined by the start address
> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
> 		if (!res->flags || !res->parent)
> 			continue;
>
>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>-			continue;
>-
> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> 		res2 = *res;
> 		res->start += size * offset;
>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
> 		if (!res->flags || !res->parent)
> 			continue;
>
>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>-			continue;
>-
> 		for (j = 0; j < vf_groups; j++) {
> 			do {
> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> 	pdn = pci_get_pdn(pdev);
>
> 	if (phb->type == PNV_PHB_IODA2) {
>+		if (!pdn->vfs_expanded) {
>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>+				" with non M64 VF BAR\n");
>+			return -EBUSY;
>+		}
>+

It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
unavailable. For this case, the VFs are permanently unavailable because of
running out of space to accomodate M64 and non-M64 VF BARs.

The error message could be printed with dev_warn() and it would be precise
as below or something else you prefer:

	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");


> 		/* Calculate available PE for required VFs */
> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
> 		pdn->offset = bitmap_find_next_zero_area(
>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> 		if (!res->flags || res->parent)
> 			continue;
> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>+					" non M64 VF BAR%d: %pR. \n",
> 				 i, res);
>-			continue;
>+			return;
> 		}
>
> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> 		if (!res->flags || res->parent)
> 			continue;
>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>-				 i, res);
>-			continue;
>-		}

When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
I think it can be avoided.

>
> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>-- 
>1.7.9.5
>
Alexey Kardashevskiy Aug. 6, 2015, 6:10 a.m. UTC | #2
On 08/06/2015 02:35 PM, Gavin Shan wrote:
> On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>> On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>> a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>> M64 windwo, which means M64 BAR can't work on it.
>>
>
> s/PHB_IODA2/PHB3


No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.


> s/windwo/window
>
>> This patch makes this explicit.
>>
>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>
> The idea sounds right, but there is one question as below.
>
>> ---
>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 5738d31..9b41dba 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>> -			continue;
>> -
>> 		/*
>> 		 * The actual IOV BAR range is determined by the start address
>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>> @@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>> -			continue;
>> -
>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>> 		res2 = *res;
>> 		res->start += size * offset;
>> @@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>> -			continue;
>> -
>> 		for (j = 0; j < vf_groups; j++) {
>> 			do {
>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>> @@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 	pdn = pci_get_pdn(pdev);
>>
>> 	if (phb->type == PNV_PHB_IODA2) {
>> +		if (!pdn->vfs_expanded) {
>> +			dev_info(&pdev->dev, "don't support this SRIOV device"
>> +				" with non M64 VF BAR\n");
>> +			return -EBUSY;
>> +		}
>> +
>
> It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
> unavailable. For this case, the VFs are permanently unavailable because of
> running out of space to accomodate M64 and non-M64 VF BARs.
>
> The error message could be printed with dev_warn() and it would be precise
> as below or something else you prefer:
>
> 	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");


Both messages are cryptic.

If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in 
the worst case - BAR#15?), the difference is if it is segmented or not, no?



>
>
>> 		/* Calculate available PE for required VFs */
>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>> 		pdn->offset = bitmap_find_next_zero_area(
>> @@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> 		if (!res->flags || res->parent)
>> 			continue;
>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>> -			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>> +			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>> +					" non M64 VF BAR%d: %pR. \n",
>> 				 i, res);
>> -			continue;
>> +			return;
>> 		}
>>
>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>> @@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>> 		if (!res->flags || res->parent)
>> 			continue;
>> -		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>> -			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>> -				 i, res);
>> -			continue;
>> -		}
>
> When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
> Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
> I think it can be avoided.
>
>>
>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>> --
>> 1.7.9.5
>>
>
Gavin Shan Aug. 6, 2015, 6:57 a.m. UTC | #3
On Thu, Aug 06, 2015 at 04:10:21PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 02:35 PM, Gavin Shan wrote:
>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>M64 windwo, which means M64 BAR can't work on it.
>>>
>>
>>s/PHB_IODA2/PHB3
>
>
>No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.
>

Ok.

>
>>s/windwo/window
>>
>>>This patch makes this explicit.
>>>
>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>
>>The idea sounds right, but there is one question as below.
>>
>>>---
>>>arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>1 file changed, 9 insertions(+), 16 deletions(-)
>>>
>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>index 5738d31..9b41dba 100644
>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>		if (!res->flags || !res->parent)
>>>			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>>		/*
>>>		 * The actual IOV BAR range is determined by the start address
>>>		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>		if (!res->flags || !res->parent)
>>>			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>>		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>		res2 = *res;
>>>		res->start += size * offset;
>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>		if (!res->flags || !res->parent)
>>>			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>>		for (j = 0; j < vf_groups; j++) {
>>>			do {
>>>				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>	pdn = pci_get_pdn(pdev);
>>>
>>>	if (phb->type == PNV_PHB_IODA2) {
>>>+		if (!pdn->vfs_expanded) {
>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>+				" with non M64 VF BAR\n");
>>>+			return -EBUSY;
>>>+		}
>>>+
>>
>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>unavailable. For this case, the VFs are permanently unavailable because of
>>running out of space to accomodate M64 and non-M64 VF BARs.
>>
>>The error message could be printed with dev_warn() and it would be precise
>>as below or something else you prefer:
>>
>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>
>
>Both messages are cryptic.
>
>If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in
>the worst case - BAR#15?), the difference is if it is segmented or not, no?
>

The VF BAR could be one of IO, M32, M64. If it's not M64, the VFs are supposed
to be disabled and the (IO and M32) resources won't be allocted, but for sure,
the IO and M32 resources can't be put into any one of the 16 PHB's M64 BARs.
would you recommend one better message then?

>>
>>
>>>		/* Calculate available PE for required VFs */
>>>		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>		pdn->offset = bitmap_find_next_zero_area(
>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>		if (!res->flags || res->parent)
>>>			continue;
>>>		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>+					" non M64 VF BAR%d: %pR. \n",
>>>				 i, res);
>>>-			continue;
>>>+			return;
>>>		}
>>>
>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>		if (!res->flags || res->parent)
>>>			continue;
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>-				 i, res);
>>>-			continue;
>>>-		}
>>
>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>I think it can be avoided.
>>
>>>
>>>		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>--
>>>1.7.9.5
>>>
>>
>
>
>-- 
>Alexey
>
Alexey Kardashevskiy Aug. 6, 2015, 7:47 a.m. UTC | #4
On 08/06/2015 04:57 PM, Gavin Shan wrote:
> On Thu, Aug 06, 2015 at 04:10:21PM +1000, Alexey Kardashevskiy wrote:
>> On 08/06/2015 02:35 PM, Gavin Shan wrote:
>>> On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>> On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>> a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>> M64 windwo, which means M64 BAR can't work on it.


The proper text would be something like this:

===
SRIOV only supports 64bit MMIO. So if we fail to assign 64bit BAR, we 
cannot enable the device.
===


>>>>
>>>
>>> s/PHB_IODA2/PHB3
>>
>>
>> No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.
>>
>
> Ok.
>
>>
>>> s/windwo/window
>>>
>>>> This patch makes this explicit.
>>>>
>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>
>>> The idea sounds right, but there is one question as below.
>>>
>>>> ---
>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>> index 5738d31..9b41dba 100644
>>>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>> @@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>> -			continue;
>>>> -
>>>> 		/*
>>>> 		 * The actual IOV BAR range is determined by the start address
>>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>> @@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>> -			continue;
>>>> -
>>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>> 		res2 = *res;
>>>> 		res->start += size * offset;
>>>> @@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>> -			continue;
>>>> -
>>>> 		for (j = 0; j < vf_groups; j++) {
>>>> 			do {
>>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>> @@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>> 	pdn = pci_get_pdn(pdev);
>>>>
>>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>> +		if (!pdn->vfs_expanded) {
>>>> +			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>> +				" with non M64 VF BAR\n");
>>>> +			return -EBUSY;
>>>> +		}
>>>> +
>>>
>>> It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>> unavailable. For this case, the VFs are permanently unavailable because of
>>> running out of space to accomodate M64 and non-M64 VF BARs.
>>>
>>> The error message could be printed with dev_warn() and it would be precise
>>> as below or something else you prefer:
>>>
>>> 	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>
>>
>> Both messages are cryptic.
>>
>> If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in
>> the worst case - BAR#15?), the difference is if it is segmented or not, no?
>>
>
> The VF BAR could be one of IO, M32, M64. If it's not M64, the VFs are supposed
> to be disabled and the (IO and M32) resources won't be allocted, but for sure,
> the IO and M32 resources can't be put into any one of the 16 PHB's M64 BARs.
> would you recommend one better message then?



dev_warn(&pdev->dev, "SRIOV is disabled as no space is left in 64bit MMIO 
window\n");

Or it is not "MMIO window"?



>
>>>
>>>
>>>> 		/* Calculate available PE for required VFs */
>>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>> @@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>> 		if (!res->flags || res->parent)
>>>> 			continue;
>>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>> -			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>> +			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>> +					" non M64 VF BAR%d: %pR. \n",
>>>> 				 i, res);
>>>> -			continue;
>>>> +			return;
>>>> 		}
>>>>
>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>> @@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>> 		if (!res->flags || res->parent)
>>>> 			continue;
>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>> -			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>> -				 i, res);
>>>> -			continue;
>>>> -		}
>>>
>>> When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>> Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>> I think it can be avoided.
>>>
>>>>
>>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
Gavin Shan Aug. 6, 2015, 11:07 a.m. UTC | #5
On Thu, Aug 06, 2015 at 05:47:42PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 04:57 PM, Gavin Shan wrote:
>>On Thu, Aug 06, 2015 at 04:10:21PM +1000, Alexey Kardashevskiy wrote:
>>>On 08/06/2015 02:35 PM, Gavin Shan wrote:
>>>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>>M64 windwo, which means M64 BAR can't work on it.
>
>
>The proper text would be something like this:
>
>===
>SRIOV only supports 64bit MMIO. So if we fail to assign 64bit BAR, we cannot
>enable the device.
>===
>
>
>>>>>
>>>>
>>>>s/PHB_IODA2/PHB3
>>>
>>>
>>>No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.
>>>
>>
>>Ok.
>>
>>>
>>>>s/windwo/window
>>>>
>>>>>This patch makes this explicit.
>>>>>
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>
>>>>The idea sounds right, but there is one question as below.
>>>>
>>>>>---
>>>>>arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>>1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>
>>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>index 5738d31..9b41dba 100644
>>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		/*
>>>>>		 * The actual IOV BAR range is determined by the start address
>>>>>		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>>		res2 = *res;
>>>>>		res->start += size * offset;
>>>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		for (j = 0; j < vf_groups; j++) {
>>>>>			do {
>>>>>				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>>	pdn = pci_get_pdn(pdev);
>>>>>
>>>>>	if (phb->type == PNV_PHB_IODA2) {
>>>>>+		if (!pdn->vfs_expanded) {
>>>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>>+				" with non M64 VF BAR\n");
>>>>>+			return -EBUSY;
>>>>>+		}
>>>>>+
>>>>
>>>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>>unavailable. For this case, the VFs are permanently unavailable because of
>>>>running out of space to accomodate M64 and non-M64 VF BARs.
>>>>
>>>>The error message could be printed with dev_warn() and it would be precise
>>>>as below or something else you prefer:
>>>>
>>>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>
>>>
>>>Both messages are cryptic.
>>>
>>>If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in
>>>the worst case - BAR#15?), the difference is if it is segmented or not, no?
>>>
>>
>>The VF BAR could be one of IO, M32, M64. If it's not M64, the VFs are supposed
>>to be disabled and the (IO and M32) resources won't be allocted, but for sure,
>>the IO and M32 resources can't be put into any one of the 16 PHB's M64 BARs.
>>would you recommend one better message then?
>
>
>
>dev_warn(&pdev->dev, "SRIOV is disabled as no space is left in 64bit MMIO
>window\n");
>
>Or it is not "MMIO window"?
>

It's a confusing message: It's not related to space and M64 window.
When any VF BAR is IO or M32, we just give up attempting to allocate
resources for it. I still think my original message is enough or
similarly below one:

	dev_warn(&pdev->dev, "Disabled SRIOV because of non-M64 BAR"

>>
>>>>
>>>>
>>>>>		/* Calculate available PE for required VFs */
>>>>>		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>>		pdn->offset = bitmap_find_next_zero_area(
>>>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>		if (!res->flags || res->parent)
>>>>>			continue;
>>>>>		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>>+					" non M64 VF BAR%d: %pR. \n",
>>>>>				 i, res);
>>>>>-			continue;
>>>>>+			return;
>>>>>		}
>>>>>
>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>>		if (!res->flags || res->parent)
>>>>>			continue;
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>>-				 i, res);
>>>>>-			continue;
>>>>>-		}
>>>>
>>>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>>I think it can be avoided.
>>>>
>>>>>
>>>>>		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>
>
>-- 
>Alexey
>
Wei Yang Aug. 6, 2015, 2:10 p.m. UTC | #6
On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>M64 windwo, which means M64 BAR can't work on it.
>>
>
>s/PHB_IODA2/PHB3
>s/windwo/window
>
>>This patch makes this explicit.
>>
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>
>The idea sounds right, but there is one question as below.
>
>>---
>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 5738d31..9b41dba 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>-			continue;
>>-
>> 		/*
>> 		 * The actual IOV BAR range is determined by the start address
>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>-			continue;
>>-
>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>> 		res2 = *res;
>> 		res->start += size * offset;
>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>> 		if (!res->flags || !res->parent)
>> 			continue;
>>
>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>-			continue;
>>-
>> 		for (j = 0; j < vf_groups; j++) {
>> 			do {
>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 	pdn = pci_get_pdn(pdev);
>>
>> 	if (phb->type == PNV_PHB_IODA2) {
>>+		if (!pdn->vfs_expanded) {
>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>+				" with non M64 VF BAR\n");
>>+			return -EBUSY;
>>+		}
>>+
>
>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>unavailable. For this case, the VFs are permanently unavailable because of
>running out of space to accomodate M64 and non-M64 VF BARs.
>
>The error message could be printed with dev_warn() and it would be precise
>as below or something else you prefer:
>
>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>

Thanks for the comment, will change accordingly.

>
>> 		/* Calculate available PE for required VFs */
>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>> 		pdn->offset = bitmap_find_next_zero_area(
>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> 		if (!res->flags || res->parent)
>> 			continue;
>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>+					" non M64 VF BAR%d: %pR. \n",
>> 				 i, res);
>>-			continue;
>>+			return;
>> 		}
>>
>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>> 		if (!res->flags || res->parent)
>> 			continue;
>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>-				 i, res);
>>-			continue;
>>-		}
>
>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>I think it can be avoided.
>

Don't get your point. You mean to avoid this function?

Or clear the IOV BAR when we found one of it is non-M64?

>>
>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>-- 
>>1.7.9.5
>>
Wei Yang Aug. 6, 2015, 2:13 p.m. UTC | #7
On Thu, Aug 06, 2015 at 05:47:42PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 04:57 PM, Gavin Shan wrote:
>>On Thu, Aug 06, 2015 at 04:10:21PM +1000, Alexey Kardashevskiy wrote:
>>>On 08/06/2015 02:35 PM, Gavin Shan wrote:
>>>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>>M64 windwo, which means M64 BAR can't work on it.
>
>
>The proper text would be something like this:
>
>===
>SRIOV only supports 64bit MMIO. So if we fail to assign 64bit BAR, we
>cannot enable the device.
>===
>
>
>>>>>
>>>>
>>>>s/PHB_IODA2/PHB3
>>>
>>>
>>>No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.
>>>
>>
>>Ok.
>>
>>>
>>>>s/windwo/window
>>>>
>>>>>This patch makes this explicit.
>>>>>
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>
>>>>The idea sounds right, but there is one question as below.
>>>>
>>>>>---
>>>>>arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>>1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>
>>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>index 5738d31..9b41dba 100644
>>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		/*
>>>>>		 * The actual IOV BAR range is determined by the start address
>>>>>		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>>		res2 = *res;
>>>>>		res->start += size * offset;
>>>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>>		if (!res->flags || !res->parent)
>>>>>			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>>		for (j = 0; j < vf_groups; j++) {
>>>>>			do {
>>>>>				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>>	pdn = pci_get_pdn(pdev);
>>>>>
>>>>>	if (phb->type == PNV_PHB_IODA2) {
>>>>>+		if (!pdn->vfs_expanded) {
>>>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>>+				" with non M64 VF BAR\n");
>>>>>+			return -EBUSY;
>>>>>+		}
>>>>>+
>>>>
>>>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>>unavailable. For this case, the VFs are permanently unavailable because of
>>>>running out of space to accomodate M64 and non-M64 VF BARs.
>>>>
>>>>The error message could be printed with dev_warn() and it would be precise
>>>>as below or something else you prefer:
>>>>
>>>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>
>>>
>>>Both messages are cryptic.
>>>
>>>If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in
>>>the worst case - BAR#15?), the difference is if it is segmented or not, no?
>>>
>>
>>The VF BAR could be one of IO, M32, M64. If it's not M64, the VFs are supposed
>>to be disabled and the (IO and M32) resources won't be allocted, but for sure,
>>the IO and M32 resources can't be put into any one of the 16 PHB's M64 BARs.
>>would you recommend one better message then?
>
>
>
>dev_warn(&pdev->dev, "SRIOV is disabled as no space is left in 64bit
>MMIO window\n");
>
>Or it is not "MMIO window"?
>

The reason is not "no space left in 64bit MMIO window".

The reason is the IOV BAR is not 64bit prefetchable, then in linux kernel this
can't be allocated from M64 Space, then we can't use M64 BAR to cover it.

>
>
>>
>>>>
>>>>
>>>>>		/* Calculate available PE for required VFs */
>>>>>		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>>		pdn->offset = bitmap_find_next_zero_area(
>>>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>		if (!res->flags || res->parent)
>>>>>			continue;
>>>>>		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>>+					" non M64 VF BAR%d: %pR. \n",
>>>>>				 i, res);
>>>>>-			continue;
>>>>>+			return;
>>>>>		}
>>>>>
>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>>		if (!res->flags || res->parent)
>>>>>			continue;
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>>-				 i, res);
>>>>>-			continue;
>>>>>-		}
>>>>
>>>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>>I think it can be avoided.
>>>>
>>>>>
>>>>>		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>
>
>-- 
>Alexey
Gavin Shan Aug. 7, 2015, 1:20 a.m. UTC | #8
On Thu, Aug 06, 2015 at 10:10:10PM +0800, Wei Yang wrote:
>On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>M64 windwo, which means M64 BAR can't work on it.
>>>
>>
>>s/PHB_IODA2/PHB3
>>s/windwo/window
>>
>>>This patch makes this explicit.
>>>
>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>
>>The idea sounds right, but there is one question as below.
>>
>>>---
>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>
>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>index 5738d31..9b41dba 100644
>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>> 		if (!res->flags || !res->parent)
>>> 			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>> 		/*
>>> 		 * The actual IOV BAR range is determined by the start address
>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>> 		if (!res->flags || !res->parent)
>>> 			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>> 		res2 = *res;
>>> 		res->start += size * offset;
>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>> 		if (!res->flags || !res->parent)
>>> 			continue;
>>>
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>-			continue;
>>>-
>>> 		for (j = 0; j < vf_groups; j++) {
>>> 			do {
>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>> 	pdn = pci_get_pdn(pdev);
>>>
>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>+		if (!pdn->vfs_expanded) {
>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>+				" with non M64 VF BAR\n");
>>>+			return -EBUSY;
>>>+		}
>>>+
>>
>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>unavailable. For this case, the VFs are permanently unavailable because of
>>running out of space to accomodate M64 and non-M64 VF BARs.
>>
>>The error message could be printed with dev_warn() and it would be precise
>>as below or something else you prefer:
>>
>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>
>
>Thanks for the comment, will change accordingly.
>
>>
>>> 		/* Calculate available PE for required VFs */
>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>> 		if (!res->flags || res->parent)
>>> 			continue;
>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>+					" non M64 VF BAR%d: %pR. \n",
>>> 				 i, res);
>>>-			continue;
>>>+			return;
>>> 		}
>>>
>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>> 		if (!res->flags || res->parent)
>>> 			continue;
>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>-				 i, res);
>>>-			continue;
>>>-		}
>>
>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>I think it can be avoided.
>>
>
>Don't get your point. You mean to avoid this function?
>
>Or clear the IOV BAR when we found one of it is non-M64?
>

I mean to clear all IOV BARs in case any more of them are IO or M32. In this
case, the SRIOV capability won't be enabled. Otherwise, the resources for
all IOV BARs are assigned and allocated by PCI subsystem, but they won't
be used. Does it make sense to you?

>>>
>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>-- 
>>>1.7.9.5
>>>
>
>-- 
>Richard Yang
>Help you, Help me
Alexey Kardashevskiy Aug. 7, 2015, 1:24 a.m. UTC | #9
On 08/07/2015 12:13 AM, Wei Yang wrote:
> On Thu, Aug 06, 2015 at 05:47:42PM +1000, Alexey Kardashevskiy wrote:
>> On 08/06/2015 04:57 PM, Gavin Shan wrote:
>>> On Thu, Aug 06, 2015 at 04:10:21PM +1000, Alexey Kardashevskiy wrote:
>>>> On 08/06/2015 02:35 PM, Gavin Shan wrote:
>>>>> On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>>> On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>>> a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>>> M64 windwo, which means M64 BAR can't work on it.
>>
>>
>> The proper text would be something like this:
>>
>> ===
>> SRIOV only supports 64bit MMIO. So if we fail to assign 64bit BAR, we
>> cannot enable the device.
>> ===
>>
>>
>>>>>>
>>>>>
>>>>> s/PHB_IODA2/PHB3
>>>>
>>>>
>>>> No, it is IODA2. OPEL does PHB3-specific bits, the host kernel just uses OPAL.
>>>>
>>>
>>> Ok.
>>>
>>>>
>>>>> s/windwo/window
>>>>>
>>>>>> This patch makes this explicit.
>>>>>>
>>>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>
>>>>> The idea sounds right, but there is one question as below.
>>>>>
>>>>>> ---
>>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>> index 5738d31..9b41dba 100644
>>>>>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>> @@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>> 		if (!res->flags || !res->parent)
>>>>>> 			continue;
>>>>>>
>>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>> -			continue;
>>>>>> -
>>>>>> 		/*
>>>>>> 		 * The actual IOV BAR range is determined by the start address
>>>>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>>> @@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>> 		if (!res->flags || !res->parent)
>>>>>> 			continue;
>>>>>>
>>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>> -			continue;
>>>>>> -
>>>>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>>> 		res2 = *res;
>>>>>> 		res->start += size * offset;
>>>>>> @@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>>> 		if (!res->flags || !res->parent)
>>>>>> 			continue;
>>>>>>
>>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>> -			continue;
>>>>>> -
>>>>>> 		for (j = 0; j < vf_groups; j++) {
>>>>>> 			do {
>>>>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>>> @@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>>> 	pdn = pci_get_pdn(pdev);
>>>>>>
>>>>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>>>> +		if (!pdn->vfs_expanded) {
>>>>>> +			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>>> +				" with non M64 VF BAR\n");
>>>>>> +			return -EBUSY;
>>>>>> +		}
>>>>>> +
>>>>>
>>>>> It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>>> unavailable. For this case, the VFs are permanently unavailable because of
>>>>> running out of space to accomodate M64 and non-M64 VF BARs.
>>>>>
>>>>> The error message could be printed with dev_warn() and it would be precise
>>>>> as below or something else you prefer:
>>>>>
>>>>> 	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>>
>>>>
>>>> Both messages are cryptic.
>>>>
>>>> If it is not M64 BAR, then what is it? It is always in one of M64 BARs (in
>>>> the worst case - BAR#15?), the difference is if it is segmented or not, no?
>>>>
>>>
>>> The VF BAR could be one of IO, M32, M64. If it's not M64, the VFs are supposed
>>> to be disabled and the (IO and M32) resources won't be allocted, but for sure,
>>> the IO and M32 resources can't be put into any one of the 16 PHB's M64 BARs.
>>> would you recommend one better message then?
>>
>>
>>
>> dev_warn(&pdev->dev, "SRIOV is disabled as no space is left in 64bit
>> MMIO window\n");
>>
>> Or it is not "MMIO window"?
>>
>
> The reason is not "no space left in 64bit MMIO window".
>
> The reason is the IOV BAR is not 64bit prefetchable, then in linux kernel this
> can't be allocated from M64 Space, then we can't use M64 BAR to cover it.

Oh. So now it is not M64 vs. M32 and IO, it is about prefetchable vs. 
non-prefetchable. Please choose one.

Should it be this then?
dev_warn(&pdev->dev, "Non-prefetchable BARs are not supported for SRIOV")


But Gavin keeps insisting on mentioning "non-M64 BAR" - this part I do not 
understand.

And where does this limit come from? Is it POWER8, IODA2, PHB3, SRIOV or 
something else? Is it all about isolation or the host _without_ KVM but 
with SRIOV also cannot use VFs if they have non-prefetchable BARs? Is this 
because of POWER8 or IODA2 or PHB3 or SRIOV or something else?




>>
>>
>>>
>>>>>
>>>>>
>>>>>> 		/* Calculate available PE for required VFs */
>>>>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>>>> @@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>> 		if (!res->flags || res->parent)
>>>>>> 			continue;
>>>>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>> -			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>>> +			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>>> +					" non M64 VF BAR%d: %pR. \n",
>>>>>> 				 i, res);
>>>>>> -			continue;
>>>>>> +			return;
>>>>>> 		}
>>>>>>
>>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>> @@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>>> 		if (!res->flags || res->parent)
>>>>>> 			continue;
>>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>> -			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>>> -				 i, res);
>>>>>> -			continue;
>>>>>> -		}
>>>>>
>>>>> When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>>> Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>>> I think it can be avoided.
>>>>>
>>>>>>
>>>>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>
>>
>> --
>> Alexey
>
Wei Yang Aug. 7, 2015, 2:24 a.m. UTC | #10
On Fri, Aug 07, 2015 at 11:20:10AM +1000, Gavin Shan wrote:
>On Thu, Aug 06, 2015 at 10:10:10PM +0800, Wei Yang wrote:
>>On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>M64 windwo, which means M64 BAR can't work on it.
>>>>
>>>
>>>s/PHB_IODA2/PHB3
>>>s/windwo/window
>>>
>>>>This patch makes this explicit.
>>>>
>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>
>>>The idea sounds right, but there is one question as below.
>>>
>>>>---
>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>index 5738d31..9b41dba 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>-			continue;
>>>>-
>>>> 		/*
>>>> 		 * The actual IOV BAR range is determined by the start address
>>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>-			continue;
>>>>-
>>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>> 		res2 = *res;
>>>> 		res->start += size * offset;
>>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>> 		if (!res->flags || !res->parent)
>>>> 			continue;
>>>>
>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>-			continue;
>>>>-
>>>> 		for (j = 0; j < vf_groups; j++) {
>>>> 			do {
>>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>> 	pdn = pci_get_pdn(pdev);
>>>>
>>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>>+		if (!pdn->vfs_expanded) {
>>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>+				" with non M64 VF BAR\n");
>>>>+			return -EBUSY;
>>>>+		}
>>>>+
>>>
>>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>unavailable. For this case, the VFs are permanently unavailable because of
>>>running out of space to accomodate M64 and non-M64 VF BARs.
>>>
>>>The error message could be printed with dev_warn() and it would be precise
>>>as below or something else you prefer:
>>>
>>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>
>>
>>Thanks for the comment, will change accordingly.
>>
>>>
>>>> 		/* Calculate available PE for required VFs */
>>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>> 		if (!res->flags || res->parent)
>>>> 			continue;
>>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>+					" non M64 VF BAR%d: %pR. \n",
>>>> 				 i, res);
>>>>-			continue;
>>>>+			return;
>>>> 		}
>>>>
>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>> 		if (!res->flags || res->parent)
>>>> 			continue;
>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>-				 i, res);
>>>>-			continue;
>>>>-		}
>>>
>>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>I think it can be avoided.
>>>
>>
>>Don't get your point. You mean to avoid this function?
>>
>>Or clear the IOV BAR when we found one of it is non-M64?
>>
>
>I mean to clear all IOV BARs in case any more of them are IO or M32. In this
>case, the SRIOV capability won't be enabled. Otherwise, the resources for
>all IOV BARs are assigned and allocated by PCI subsystem, but they won't
>be used. Does it make sense to you?
>

If we want to save MMIO space, this is not necessary.

The IOV BAR will be put into the optional list in assignment stage. So when
there is not enough MMIO space, they will not be assigned.

For the long term, maybe P9/P10, we will finally adjust the solution to
support SRIOV devices with M32 MMIO. So I suggest to leave as it is.

>>>>
>>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>-- 
>>>>1.7.9.5
>>>>
>>
>>-- 
>>Richard Yang
>>Help you, Help me
Gavin Shan Aug. 7, 2015, 3:50 a.m. UTC | #11
On Fri, Aug 07, 2015 at 10:24:05AM +0800, Wei Yang wrote:
>On Fri, Aug 07, 2015 at 11:20:10AM +1000, Gavin Shan wrote:
>>On Thu, Aug 06, 2015 at 10:10:10PM +0800, Wei Yang wrote:
>>>On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>>>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>>M64 windwo, which means M64 BAR can't work on it.
>>>>>
>>>>
>>>>s/PHB_IODA2/PHB3
>>>>s/windwo/window
>>>>
>>>>>This patch makes this explicit.
>>>>>
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>
>>>>The idea sounds right, but there is one question as below.
>>>>
>>>>>---
>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>
>>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>index 5738d31..9b41dba 100644
>>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>> 		/*
>>>>> 		 * The actual IOV BAR range is determined by the start address
>>>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>> 		res2 = *res;
>>>>> 		res->start += size * offset;
>>>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>-			continue;
>>>>>-
>>>>> 		for (j = 0; j < vf_groups; j++) {
>>>>> 			do {
>>>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>> 	pdn = pci_get_pdn(pdev);
>>>>>
>>>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>>>+		if (!pdn->vfs_expanded) {
>>>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>>+				" with non M64 VF BAR\n");
>>>>>+			return -EBUSY;
>>>>>+		}
>>>>>+
>>>>
>>>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>>unavailable. For this case, the VFs are permanently unavailable because of
>>>>running out of space to accomodate M64 and non-M64 VF BARs.
>>>>
>>>>The error message could be printed with dev_warn() and it would be precise
>>>>as below or something else you prefer:
>>>>
>>>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>>
>>>
>>>Thanks for the comment, will change accordingly.
>>>
>>>>
>>>>> 		/* Calculate available PE for required VFs */
>>>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>> 		if (!res->flags || res->parent)
>>>>> 			continue;
>>>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>>+					" non M64 VF BAR%d: %pR. \n",
>>>>> 				 i, res);
>>>>>-			continue;
>>>>>+			return;
>>>>> 		}
>>>>>
>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>> 		if (!res->flags || res->parent)
>>>>> 			continue;
>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>>-				 i, res);
>>>>>-			continue;
>>>>>-		}
>>>>
>>>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>>I think it can be avoided.
>>>>
>>>
>>>Don't get your point. You mean to avoid this function?
>>>
>>>Or clear the IOV BAR when we found one of it is non-M64?
>>>
>>
>>I mean to clear all IOV BARs in case any more of them are IO or M32. In this
>>case, the SRIOV capability won't be enabled. Otherwise, the resources for
>>all IOV BARs are assigned and allocated by PCI subsystem, but they won't
>>be used. Does it make sense to you?
>>
>
>If we want to save MMIO space, this is not necessary.
>
>The IOV BAR will be put into the optional list in assignment stage. So when
>there is not enough MMIO space, they will not be assigned.
>

Why it's not necessary? The problem isn't related to MMIO space - enough, or
not enough as I explained above. Lets have an example here: PF has two
IOV BARs. One of the them is M32 BAR and the other is M64 BAR. For this case,
the SRIOV won't be enabled on the PF. Will the code still assign M32 resources
and M64 resources for those VF BARs (or IOV BARs)? If so, those VF BARs are
never used. Aren't we wasting resources? And how much MMIO resources are wasted
in this case?

>For the long term, maybe P9/P10, we will finally adjust the solution to
>support SRIOV devices with M32 MMIO. So I suggest to leave as it is.
>

I don't see how this problem is specific to P8. However, if you don't want
to improve this case currently. I'm fine. somebody can improve it in future.

>>>>>
>>>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>-- 
>>>>>1.7.9.5
>>>>>
>>>
>>>-- 
>>>Richard Yang
>>>Help you, Help me
>
>-- 
>Richard Yang
>Help you, Help me
Alexey Kardashevskiy Aug. 7, 2015, 7:14 a.m. UTC | #12
On 08/07/2015 12:24 PM, Wei Yang wrote:
> On Fri, Aug 07, 2015 at 11:20:10AM +1000, Gavin Shan wrote:
>> On Thu, Aug 06, 2015 at 10:10:10PM +0800, Wei Yang wrote:
>>> On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>>>> On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>> On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>> a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>> M64 windwo, which means M64 BAR can't work on it.
>>>>>
>>>>
>>>> s/PHB_IODA2/PHB3
>>>> s/windwo/window
>>>>
>>>>> This patch makes this explicit.
>>>>>
>>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>
>>>> The idea sounds right, but there is one question as below.
>>>>
>>>>> ---
>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>> index 5738d31..9b41dba 100644
>>>>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>> @@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>> -			continue;
>>>>> -
>>>>> 		/*
>>>>> 		 * The actual IOV BAR range is determined by the start address
>>>>> 		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>> @@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>> -			continue;
>>>>> -
>>>>> 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>> 		res2 = *res;
>>>>> 		res->start += size * offset;
>>>>> @@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>> 		if (!res->flags || !res->parent)
>>>>> 			continue;
>>>>>
>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>> -			continue;
>>>>> -
>>>>> 		for (j = 0; j < vf_groups; j++) {
>>>>> 			do {
>>>>> 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>> @@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>> 	pdn = pci_get_pdn(pdev);
>>>>>
>>>>> 	if (phb->type == PNV_PHB_IODA2) {
>>>>> +		if (!pdn->vfs_expanded) {
>>>>> +			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>> +				" with non M64 VF BAR\n");
>>>>> +			return -EBUSY;
>>>>> +		}
>>>>> +
>>>>
>>>> It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>> unavailable. For this case, the VFs are permanently unavailable because of
>>>> running out of space to accomodate M64 and non-M64 VF BARs.
>>>>
>>>> The error message could be printed with dev_warn() and it would be precise
>>>> as below or something else you prefer:
>>>>
>>>> 	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>>
>>>
>>> Thanks for the comment, will change accordingly.
>>>
>>>>
>>>>> 		/* Calculate available PE for required VFs */
>>>>> 		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>> 		pdn->offset = bitmap_find_next_zero_area(
>>>>> @@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>> 		if (!res->flags || res->parent)
>>>>> 			continue;
>>>>> 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>> -			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>> +			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>> +					" non M64 VF BAR%d: %pR. \n",
>>>>> 				 i, res);
>>>>> -			continue;
>>>>> +			return;
>>>>> 		}
>>>>>
>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>> @@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>> 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>> 		if (!res->flags || res->parent)
>>>>> 			continue;
>>>>> -		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>> -			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>> -				 i, res);
>>>>> -			continue;
>>>>> -		}
>>>>
>>>> When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>> Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>> I think it can be avoided.
>>>>
>>>
>>> Don't get your point. You mean to avoid this function?
>>>
>>> Or clear the IOV BAR when we found one of it is non-M64?
>>>
>>
>> I mean to clear all IOV BARs in case any more of them are IO or M32. In this
>> case, the SRIOV capability won't be enabled. Otherwise, the resources for
>> all IOV BARs are assigned and allocated by PCI subsystem, but they won't
>> be used. Does it make sense to you?
>>
>
> If we want to save MMIO space, this is not necessary.
>
> The IOV BAR will be put into the optional list in assignment stage. So when
> there is not enough MMIO space, they will not be assigned.


If we are not going to use non-64bit IOV BAR, why would we assign anything 
to it at the first place? Or it is a common PCI code which does it?




> For the long term, maybe P9/P10, we will finally adjust the solution to
> support SRIOV devices with M32 MMIO. So I suggest to leave as it is.
>
>>>>>
>>>>> 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>> 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
Wei Yang Aug. 10, 2015, 1:40 a.m. UTC | #13
On Fri, Aug 07, 2015 at 05:14:41PM +1000, Alexey Kardashevskiy wrote:
>On 08/07/2015 12:24 PM, Wei Yang wrote:
>>On Fri, Aug 07, 2015 at 11:20:10AM +1000, Gavin Shan wrote:
>>>On Thu, Aug 06, 2015 at 10:10:10PM +0800, Wei Yang wrote:
>>>>On Thu, Aug 06, 2015 at 02:35:57PM +1000, Gavin Shan wrote:
>>>>>On Wed, Aug 05, 2015 at 09:24:58AM +0800, Wei Yang wrote:
>>>>>>On PHB_IODA2, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If
>>>>>>a SRIOV device's BAR is not 64-bit prefetchable, this is not assigned from
>>>>>>M64 windwo, which means M64 BAR can't work on it.
>>>>>>
>>>>>
>>>>>s/PHB_IODA2/PHB3
>>>>>s/windwo/window
>>>>>
>>>>>>This patch makes this explicit.
>>>>>>
>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>
>>>>>The idea sounds right, but there is one question as below.
>>>>>
>>>>>>---
>>>>>>arch/powerpc/platforms/powernv/pci-ioda.c |   25 +++++++++----------------
>>>>>>1 file changed, 9 insertions(+), 16 deletions(-)
>>>>>>
>>>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>>index 5738d31..9b41dba 100644
>>>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>>@@ -908,9 +908,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>>		if (!res->flags || !res->parent)
>>>>>>			continue;
>>>>>>
>>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>>-			continue;
>>>>>>-
>>>>>>		/*
>>>>>>		 * The actual IOV BAR range is determined by the start address
>>>>>>		 * and the actual size for num_vfs VFs BAR.  This check is to
>>>>>>@@ -939,9 +936,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>>>>>>		if (!res->flags || !res->parent)
>>>>>>			continue;
>>>>>>
>>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>>-			continue;
>>>>>>-
>>>>>>		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
>>>>>>		res2 = *res;
>>>>>>		res->start += size * offset;
>>>>>>@@ -1221,9 +1215,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>>>>		if (!res->flags || !res->parent)
>>>>>>			continue;
>>>>>>
>>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags))
>>>>>>-			continue;
>>>>>>-
>>>>>>		for (j = 0; j < vf_groups; j++) {
>>>>>>			do {
>>>>>>				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>>>>>>@@ -1510,6 +1501,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>>>>>	pdn = pci_get_pdn(pdev);
>>>>>>
>>>>>>	if (phb->type == PNV_PHB_IODA2) {
>>>>>>+		if (!pdn->vfs_expanded) {
>>>>>>+			dev_info(&pdev->dev, "don't support this SRIOV device"
>>>>>>+				" with non M64 VF BAR\n");
>>>>>>+			return -EBUSY;
>>>>>>+		}
>>>>>>+
>>>>>
>>>>>It would be -ENOSPC since -EBUSY indicates the devices (VFs) are temparily
>>>>>unavailable. For this case, the VFs are permanently unavailable because of
>>>>>running out of space to accomodate M64 and non-M64 VF BARs.
>>>>>
>>>>>The error message could be printed with dev_warn() and it would be precise
>>>>>as below or something else you prefer:
>>>>>
>>>>>	dev_warn(&pdev->dev, "SRIOV not supported because of non-M64 VF BAR\n");
>>>>>
>>>>
>>>>Thanks for the comment, will change accordingly.
>>>>
>>>>>
>>>>>>		/* Calculate available PE for required VFs */
>>>>>>		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>>>>		pdn->offset = bitmap_find_next_zero_area(
>>>>>>@@ -2774,9 +2771,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>>		if (!res->flags || res->parent)
>>>>>>			continue;
>>>>>>		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>>-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
>>>>>>+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>>>>>>+					" non M64 VF BAR%d: %pR. \n",
>>>>>>				 i, res);
>>>>>>-			continue;
>>>>>>+			return;
>>>>>>		}
>>>>>>
>>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>>>>>>@@ -2795,11 +2793,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>>>>>>		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>>>>>>		if (!res->flags || res->parent)
>>>>>>			continue;
>>>>>>-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>>>>>>-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>>>>>>-				 i, res);
>>>>>>-			continue;
>>>>>>-		}
>>>>>
>>>>>When any one IOV BAR on the PF is non-M64, none of the VFs can be enabled.
>>>>>Will we still allocate/assign M64 or M32 resources for the IOV BARs? If so,
>>>>>I think it can be avoided.
>>>>>
>>>>
>>>>Don't get your point. You mean to avoid this function?
>>>>
>>>>Or clear the IOV BAR when we found one of it is non-M64?
>>>>
>>>
>>>I mean to clear all IOV BARs in case any more of them are IO or M32. In this
>>>case, the SRIOV capability won't be enabled. Otherwise, the resources for
>>>all IOV BARs are assigned and allocated by PCI subsystem, but they won't
>>>be used. Does it make sense to you?
>>>
>>
>>If we want to save MMIO space, this is not necessary.
>>
>>The IOV BAR will be put into the optional list in assignment stage. So when
>>there is not enough MMIO space, they will not be assigned.
>
>
>If we are not going to use non-64bit IOV BAR, why would we assign
>anything to it at the first place? Or it is a common PCI code which
>does it?
>

Yes.

First skiboot will allocate the range, then kernel read it. Kernel has two
choice, use the address firmware allocated or re-assigned them as we did on
powernv platform

>
>
>
>>For the long term, maybe P9/P10, we will finally adjust the solution to
>>support SRIOV devices with M32 MMIO. So I suggest to leave as it is.
>>
>>>>>>
>>>>>>		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>>>>>>		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>
>
>
>-- 
>Alexey
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5738d31..9b41dba 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -908,9 +908,6 @@  static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 		if (!res->flags || !res->parent)
 			continue;
 
-		if (!pnv_pci_is_mem_pref_64(res->flags))
-			continue;
-
 		/*
 		 * The actual IOV BAR range is determined by the start address
 		 * and the actual size for num_vfs VFs BAR.  This check is to
@@ -939,9 +936,6 @@  static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 		if (!res->flags || !res->parent)
 			continue;
 
-		if (!pnv_pci_is_mem_pref_64(res->flags))
-			continue;
-
 		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
 		res2 = *res;
 		res->start += size * offset;
@@ -1221,9 +1215,6 @@  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 		if (!res->flags || !res->parent)
 			continue;
 
-		if (!pnv_pci_is_mem_pref_64(res->flags))
-			continue;
-
 		for (j = 0; j < vf_groups; j++) {
 			do {
 				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
@@ -1510,6 +1501,12 @@  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 	pdn = pci_get_pdn(pdev);
 
 	if (phb->type == PNV_PHB_IODA2) {
+		if (!pdn->vfs_expanded) {
+			dev_info(&pdev->dev, "don't support this SRIOV device"
+				" with non M64 VF BAR\n");
+			return -EBUSY;
+		}
+
 		/* Calculate available PE for required VFs */
 		mutex_lock(&phb->ioda.pe_alloc_mutex);
 		pdn->offset = bitmap_find_next_zero_area(
@@ -2774,9 +2771,10 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 		if (!res->flags || res->parent)
 			continue;
 		if (!pnv_pci_is_mem_pref_64(res->flags)) {
-			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+			dev_warn(&pdev->dev, "Don't support SR-IOV with"
+					" non M64 VF BAR%d: %pR. \n",
 				 i, res);
-			continue;
+			return;
 		}
 
 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
@@ -2795,11 +2793,6 @@  static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || res->parent)
 			continue;
-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
-			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
-				 i, res);
-			continue;
-		}
 
 		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
 		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);