diff mbox

[V2,6/6] powerpc/powernv: allocate discrete PE# when using M64 BAR in Single PE mode

Message ID 1438737903-10399-7-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wei Yang Aug. 5, 2015, 1:25 a.m. UTC
When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
discrete.

This patch restructures the patch to allocate discrete PE# for VFs when M64
BAR is set to Single PE mode.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pci-bridge.h     |    2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
 2 files changed, 51 insertions(+), 20 deletions(-)

Comments

Gavin Shan Aug. 6, 2015, 5:36 a.m. UTC | #1
On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>discrete.
>
>This patch restructures the patch to allocate discrete PE# for VFs when M64
>BAR is set to Single PE mode.
>
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>---
> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
> 2 files changed, 51 insertions(+), 20 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>index 8aeba4c..72415c7 100644
>--- a/arch/powerpc/include/asm/pci-bridge.h
>+++ b/arch/powerpc/include/asm/pci-bridge.h
>@@ -213,7 +213,7 @@ struct pci_dn {
> #ifdef CONFIG_PCI_IOV
> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
> 	u16     num_vfs;		/* number of VFs enabled*/
>-	int     offset;			/* PE# for the first VF PE */
>+	int     *offset;		/* PE# for the first VF PE or array */
> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
> #define IODA_INVALID_M64        (-1)
> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];

how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
all of them will be used, not too much memory will be wasted.

>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 4042303..9953829 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -1243,7 +1243,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>
> 			/* Map the M64 here */
> 			if (pdn->m64_single_mode) {
>-				pe_num = pdn->offset + j;
>+				pe_num = pdn->offset[j];
> 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> 						pe_num, OPAL_M64_WINDOW_TYPE,
> 						pdn->m64_map[j][i], 0);
>@@ -1347,7 +1347,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
> 	struct pnv_phb        *phb;
> 	struct pci_dn         *pdn;
> 	struct pci_sriov      *iov;
>-	u16 num_vfs;
>+	u16                    num_vfs, i;
>
> 	bus = pdev->bus;
> 	hose = pci_bus_to_host(bus);
>@@ -1361,14 +1361,18 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
>
> 	if (phb->type == PNV_PHB_IODA2) {
> 		if (!pdn->m64_single_mode)
>-			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
>+			pnv_pci_vf_resource_shift(pdev, -*pdn->offset);
>
> 		/* Release M64 windows */
> 		pnv_pci_vf_release_m64(pdev, num_vfs);
>
> 		/* Release PE numbers */
>-		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>-		pdn->offset = 0;
>+		if (pdn->m64_single_mode) {
>+			for (i = 0; i < num_vfs; i++)
>+				pnv_ioda_free_pe(phb, pdn->offset[i]);
>+		} else
>+			bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>+		kfree(pdn->offset);

Can pnv_ioda_free_pe() be reused to release PE ?

> 	}
> }
>
>@@ -1394,7 +1398,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>
> 	/* Reserve PE for each VF */
> 	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
>-		pe_num = pdn->offset + vf_index;
>+		if (pdn->m64_single_mode)
>+			pe_num = pdn->offset[vf_index];
>+		else
>+			pe_num = *pdn->offset + vf_index;
>
> 		pe = &phb->ioda.pe_array[pe_num];
> 		pe->pe_number = pe_num;
>@@ -1436,6 +1443,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> 	struct pnv_phb        *phb;
> 	struct pci_dn         *pdn;
> 	int                    ret;
>+	u16                    i;
>
> 	bus = pdev->bus;
> 	hose = pci_bus_to_host(bus);
>@@ -1462,19 +1470,38 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> 		}
>
> 		/* Calculate available PE for required VFs */
>-		mutex_lock(&phb->ioda.pe_alloc_mutex);
>-		pdn->offset = bitmap_find_next_zero_area(
>-			phb->ioda.pe_alloc, phb->ioda.total_pe,
>-			0, num_vfs, 0);
>-		if (pdn->offset >= phb->ioda.total_pe) {
>+		if (pdn->m64_single_mode) {
>+			pdn->offset = kmalloc(sizeof(*pdn->offset) * num_vfs,
>+					GFP_KERNEL);
>+			if (!pdn->offset)
>+				return -ENOMEM;
>+			for (i = 0; i < num_vfs; i++)
>+				pdn->offset[i] = IODA_INVALID_PE;
>+			for (i = 0; i < num_vfs; i++) {
>+				pdn->offset[i] = pnv_ioda_alloc_pe(phb);
>+				if (pdn->offset[i] == IODA_INVALID_PE) {
>+					ret = -EBUSY;
>+					goto m64_failed;
>+				}
>+			}
>+		} else {
>+			pdn->offset = kmalloc(sizeof(*pdn->offset), GFP_KERNEL);
>+			if (!pdn->offset)
>+				return -ENOMEM;
>+			mutex_lock(&phb->ioda.pe_alloc_mutex);
>+			*pdn->offset = bitmap_find_next_zero_area(
>+				phb->ioda.pe_alloc, phb->ioda.total_pe,
>+				0, num_vfs, 0);
>+			if (*pdn->offset >= phb->ioda.total_pe) {
>+				mutex_unlock(&phb->ioda.pe_alloc_mutex);
>+				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>+				kfree(pdn->offset);
>+				return -EBUSY;
>+			}
>+			bitmap_set(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
> 			mutex_unlock(&phb->ioda.pe_alloc_mutex);
>-			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>-			pdn->offset = 0;
>-			return -EBUSY;
> 		}
>-		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
> 		pdn->num_vfs = num_vfs;
>-		mutex_unlock(&phb->ioda.pe_alloc_mutex);
>
> 		/* Assign M64 window accordingly */
> 		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
>@@ -1489,7 +1516,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> 		 * Otherwise, the PE# for the VF will conflict with others.
> 		 */
> 		if (!pdn->m64_single_mode) {
>-			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
>+			ret = pnv_pci_vf_resource_shift(pdev, *pdn->offset);
> 			if (ret)
> 				goto m64_failed;
> 		}
>@@ -1501,8 +1528,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> 	return 0;
>
> m64_failed:
>-	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>-	pdn->offset = 0;
>+	if (pdn->m64_single_mode) {
>+		for (i = 0; i < num_vfs; i++)
>+			pnv_ioda_free_pe(phb, pdn->offset[i]);
>+	} else
>+		bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>+	kfree(pdn->offset);
>
> 	return ret;
> }
>-- 
>1.7.9.5
>
Wei Yang Aug. 6, 2015, 1:41 p.m. UTC | #2
On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>discrete.
>>
>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>BAR is set to Single PE mode.
>>
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>---
>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>
>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>index 8aeba4c..72415c7 100644
>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>@@ -213,7 +213,7 @@ struct pci_dn {
>> #ifdef CONFIG_PCI_IOV
>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>> 	u16     num_vfs;		/* number of VFs enabled*/
>>-	int     offset;			/* PE# for the first VF PE */
>>+	int     *offset;		/* PE# for the first VF PE or array */
>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>> #define IODA_INVALID_M64        (-1)
>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>
>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>all of them will be used, not too much memory will be wasted.
>

Thanks for your comment.

I have thought about change the name to make it more self explain. While
another fact I want to take in is this field is also used to be reflect the
shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.

How about use "enum", one maintain the name "offset", and another one rename to
"pe_num_map". And use the meaningful name at proper place?

>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 4042303..9953829 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -1243,7 +1243,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>
>> 			/* Map the M64 here */
>> 			if (pdn->m64_single_mode) {
>>-				pe_num = pdn->offset + j;
>>+				pe_num = pdn->offset[j];
>> 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>> 						pe_num, OPAL_M64_WINDOW_TYPE,
>> 						pdn->m64_map[j][i], 0);
>>@@ -1347,7 +1347,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
>> 	struct pnv_phb        *phb;
>> 	struct pci_dn         *pdn;
>> 	struct pci_sriov      *iov;
>>-	u16 num_vfs;
>>+	u16                    num_vfs, i;
>>
>> 	bus = pdev->bus;
>> 	hose = pci_bus_to_host(bus);
>>@@ -1361,14 +1361,18 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
>>
>> 	if (phb->type == PNV_PHB_IODA2) {
>> 		if (!pdn->m64_single_mode)
>>-			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
>>+			pnv_pci_vf_resource_shift(pdev, -*pdn->offset);
>>
>> 		/* Release M64 windows */
>> 		pnv_pci_vf_release_m64(pdev, num_vfs);
>>
>> 		/* Release PE numbers */
>>-		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>>-		pdn->offset = 0;
>>+		if (pdn->m64_single_mode) {
>>+			for (i = 0; i < num_vfs; i++)
>>+				pnv_ioda_free_pe(phb, pdn->offset[i]);
>>+		} else
>>+			bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>>+		kfree(pdn->offset);
>
>Can pnv_ioda_free_pe() be reused to release PE ?

You mean use it to similar thing in pnv_ioda_deconfigure_pe()?

>
>> 	}
>> }
>>
>>@@ -1394,7 +1398,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>
>> 	/* Reserve PE for each VF */
>> 	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
>>-		pe_num = pdn->offset + vf_index;
>>+		if (pdn->m64_single_mode)
>>+			pe_num = pdn->offset[vf_index];
>>+		else
>>+			pe_num = *pdn->offset + vf_index;
>>
>> 		pe = &phb->ioda.pe_array[pe_num];
>> 		pe->pe_number = pe_num;
>>@@ -1436,6 +1443,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 	struct pnv_phb        *phb;
>> 	struct pci_dn         *pdn;
>> 	int                    ret;
>>+	u16                    i;
>>
>> 	bus = pdev->bus;
>> 	hose = pci_bus_to_host(bus);
>>@@ -1462,19 +1470,38 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 		}
>>
>> 		/* Calculate available PE for required VFs */
>>-		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>-		pdn->offset = bitmap_find_next_zero_area(
>>-			phb->ioda.pe_alloc, phb->ioda.total_pe,
>>-			0, num_vfs, 0);
>>-		if (pdn->offset >= phb->ioda.total_pe) {
>>+		if (pdn->m64_single_mode) {
>>+			pdn->offset = kmalloc(sizeof(*pdn->offset) * num_vfs,
>>+					GFP_KERNEL);
>>+			if (!pdn->offset)
>>+				return -ENOMEM;
>>+			for (i = 0; i < num_vfs; i++)
>>+				pdn->offset[i] = IODA_INVALID_PE;
>>+			for (i = 0; i < num_vfs; i++) {
>>+				pdn->offset[i] = pnv_ioda_alloc_pe(phb);
>>+				if (pdn->offset[i] == IODA_INVALID_PE) {
>>+					ret = -EBUSY;
>>+					goto m64_failed;
>>+				}
>>+			}
>>+		} else {
>>+			pdn->offset = kmalloc(sizeof(*pdn->offset), GFP_KERNEL);
>>+			if (!pdn->offset)
>>+				return -ENOMEM;
>>+			mutex_lock(&phb->ioda.pe_alloc_mutex);
>>+			*pdn->offset = bitmap_find_next_zero_area(
>>+				phb->ioda.pe_alloc, phb->ioda.total_pe,
>>+				0, num_vfs, 0);
>>+			if (*pdn->offset >= phb->ioda.total_pe) {
>>+				mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>+				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>>+				kfree(pdn->offset);
>>+				return -EBUSY;
>>+			}
>>+			bitmap_set(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>> 			mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>-			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>>-			pdn->offset = 0;
>>-			return -EBUSY;
>> 		}
>>-		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>> 		pdn->num_vfs = num_vfs;
>>-		mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>
>> 		/* Assign M64 window accordingly */
>> 		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
>>@@ -1489,7 +1516,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 		 * Otherwise, the PE# for the VF will conflict with others.
>> 		 */
>> 		if (!pdn->m64_single_mode) {
>>-			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
>>+			ret = pnv_pci_vf_resource_shift(pdev, *pdn->offset);
>> 			if (ret)
>> 				goto m64_failed;
>> 		}
>>@@ -1501,8 +1528,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>> 	return 0;
>>
>> m64_failed:
>>-	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>>-	pdn->offset = 0;
>>+	if (pdn->m64_single_mode) {
>>+		for (i = 0; i < num_vfs; i++)
>>+			pnv_ioda_free_pe(phb, pdn->offset[i]);
>>+	} else
>>+		bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>>+	kfree(pdn->offset);
>>
>> 	return ret;
>> }
>>-- 
>>1.7.9.5
>>
Gavin Shan Aug. 7, 2015, 1:36 a.m. UTC | #3
On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>discrete.
>>>
>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>BAR is set to Single PE mode.
>>>
>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>---
>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>
>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>index 8aeba4c..72415c7 100644
>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>> #ifdef CONFIG_PCI_IOV
>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>-	int     offset;			/* PE# for the first VF PE */
>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>> #define IODA_INVALID_M64        (-1)
>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>
>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>all of them will be used, not too much memory will be wasted.
>>
>
>Thanks for your comment.
>
>I have thought about change the name to make it more self explain. While
>another fact I want to take in is this field is also used to be reflect the
>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>
>How about use "enum", one maintain the name "offset", and another one rename to
>"pe_num_map". And use the meaningful name at proper place?
>

Ok. I'm explaining it with more details. There are two cases: single vs shared
mode. When PHB M64 BARs run in single mode, you need an array to track the
allocated discrete PE#. The VF_index is the index to the array. When PHB M64
BARs run in shared mode, you need continuous PE#. No array required for this
case. Instead, the starting PE# should be stored to somewhere, which can
be pdn->offset[0] simply.

So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
can enable (max_vf_num + 1) VFs.

With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
when enabling SRIOV capability and releasing on disabling SRIOV capability.
Instead, those two arrays can be allocated during resource fixup time and free'ed
when destroying the pdn.

>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>index 4042303..9953829 100644
>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>@@ -1243,7 +1243,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>>>
>>> 			/* Map the M64 here */
>>> 			if (pdn->m64_single_mode) {
>>>-				pe_num = pdn->offset + j;
>>>+				pe_num = pdn->offset[j];
>>> 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>>> 						pe_num, OPAL_M64_WINDOW_TYPE,
>>> 						pdn->m64_map[j][i], 0);
>>>@@ -1347,7 +1347,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
>>> 	struct pnv_phb        *phb;
>>> 	struct pci_dn         *pdn;
>>> 	struct pci_sriov      *iov;
>>>-	u16 num_vfs;
>>>+	u16                    num_vfs, i;
>>>
>>> 	bus = pdev->bus;
>>> 	hose = pci_bus_to_host(bus);
>>>@@ -1361,14 +1361,18 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
>>>
>>> 	if (phb->type == PNV_PHB_IODA2) {
>>> 		if (!pdn->m64_single_mode)
>>>-			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
>>>+			pnv_pci_vf_resource_shift(pdev, -*pdn->offset);
>>>
>>> 		/* Release M64 windows */
>>> 		pnv_pci_vf_release_m64(pdev, num_vfs);
>>>
>>> 		/* Release PE numbers */
>>>-		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>>>-		pdn->offset = 0;
>>>+		if (pdn->m64_single_mode) {
>>>+			for (i = 0; i < num_vfs; i++)
>>>+				pnv_ioda_free_pe(phb, pdn->offset[i]);
>>>+		} else
>>>+			bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>>>+		kfree(pdn->offset);
>>
>>Can pnv_ioda_free_pe() be reused to release PE ?
>
>You mean use it to similar thing in pnv_ioda_deconfigure_pe()?
>

Forget it please. You can clean it up later, not in this patchset...

>>
>>> 	}
>>> }
>>>
>>>@@ -1394,7 +1398,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>
>>> 	/* Reserve PE for each VF */
>>> 	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
>>>-		pe_num = pdn->offset + vf_index;
>>>+		if (pdn->m64_single_mode)
>>>+			pe_num = pdn->offset[vf_index];
>>>+		else
>>>+			pe_num = *pdn->offset + vf_index;
>>>
>>> 		pe = &phb->ioda.pe_array[pe_num];
>>> 		pe->pe_number = pe_num;
>>>@@ -1436,6 +1443,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>> 	struct pnv_phb        *phb;
>>> 	struct pci_dn         *pdn;
>>> 	int                    ret;
>>>+	u16                    i;
>>>
>>> 	bus = pdev->bus;
>>> 	hose = pci_bus_to_host(bus);
>>>@@ -1462,19 +1470,38 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>> 		}
>>>
>>> 		/* Calculate available PE for required VFs */
>>>-		mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>-		pdn->offset = bitmap_find_next_zero_area(
>>>-			phb->ioda.pe_alloc, phb->ioda.total_pe,
>>>-			0, num_vfs, 0);
>>>-		if (pdn->offset >= phb->ioda.total_pe) {
>>>+		if (pdn->m64_single_mode) {
>>>+			pdn->offset = kmalloc(sizeof(*pdn->offset) * num_vfs,
>>>+					GFP_KERNEL);
>>>+			if (!pdn->offset)
>>>+				return -ENOMEM;
>>>+			for (i = 0; i < num_vfs; i++)
>>>+				pdn->offset[i] = IODA_INVALID_PE;
>>>+			for (i = 0; i < num_vfs; i++) {
>>>+				pdn->offset[i] = pnv_ioda_alloc_pe(phb);
>>>+				if (pdn->offset[i] == IODA_INVALID_PE) {
>>>+					ret = -EBUSY;
>>>+					goto m64_failed;
>>>+				}
>>>+			}
>>>+		} else {
>>>+			pdn->offset = kmalloc(sizeof(*pdn->offset), GFP_KERNEL);
>>>+			if (!pdn->offset)
>>>+				return -ENOMEM;
>>>+			mutex_lock(&phb->ioda.pe_alloc_mutex);
>>>+			*pdn->offset = bitmap_find_next_zero_area(
>>>+				phb->ioda.pe_alloc, phb->ioda.total_pe,
>>>+				0, num_vfs, 0);
>>>+			if (*pdn->offset >= phb->ioda.total_pe) {
>>>+				mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>>+				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>>>+				kfree(pdn->offset);
>>>+				return -EBUSY;
>>>+			}
>>>+			bitmap_set(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>>> 			mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>>-			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
>>>-			pdn->offset = 0;
>>>-			return -EBUSY;
>>> 		}
>>>-		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>>> 		pdn->num_vfs = num_vfs;
>>>-		mutex_unlock(&phb->ioda.pe_alloc_mutex);
>>>
>>> 		/* Assign M64 window accordingly */
>>> 		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
>>>@@ -1489,7 +1516,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>> 		 * Otherwise, the PE# for the VF will conflict with others.
>>> 		 */
>>> 		if (!pdn->m64_single_mode) {
>>>-			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
>>>+			ret = pnv_pci_vf_resource_shift(pdev, *pdn->offset);
>>> 			if (ret)
>>> 				goto m64_failed;
>>> 		}
>>>@@ -1501,8 +1528,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>>> 	return 0;
>>>
>>> m64_failed:
>>>-	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
>>>-	pdn->offset = 0;
>>>+	if (pdn->m64_single_mode) {
>>>+		for (i = 0; i < num_vfs; i++)
>>>+			pnv_ioda_free_pe(phb, pdn->offset[i]);
>>>+	} else
>>>+		bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
>>>+	kfree(pdn->offset);
>>>
>>> 	return ret;
>>> }
>>>-- 
>>>1.7.9.5
>>>
>
>-- 
>Richard Yang
>Help you, Help me
Wei Yang Aug. 7, 2015, 2:33 a.m. UTC | #4
On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>discrete.
>>>>
>>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>BAR is set to Single PE mode.
>>>>
>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>---
>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>index 8aeba4c..72415c7 100644
>>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>>> #ifdef CONFIG_PCI_IOV
>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>-	int     offset;			/* PE# for the first VF PE */
>>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>> #define IODA_INVALID_M64        (-1)
>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>
>>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>all of them will be used, not too much memory will be wasted.
>>>
>>
>>Thanks for your comment.
>>
>>I have thought about change the name to make it more self explain. While
>>another fact I want to take in is this field is also used to be reflect the
>>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>
>>How about use "enum", one maintain the name "offset", and another one rename to
>>"pe_num_map". And use the meaningful name at proper place?
>>

So I suppose you agree with my naming proposal.

>
>Ok. I'm explaining it with more details. There are two cases: single vs shared
>mode. When PHB M64 BARs run in single mode, you need an array to track the
>allocated discrete PE#. The VF_index is the index to the array. When PHB M64
>BARs run in shared mode, you need continuous PE#. No array required for this
>case. Instead, the starting PE# should be stored to somewhere, which can
>be pdn->offset[0] simply.
>
>So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
>*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
>can enable (max_vf_num + 1) VFs.

The max_vf_num is 15?

>
>With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
>when enabling SRIOV capability and releasing on disabling SRIOV capability.
>Instead, those two arrays can be allocated during resource fixup time and free'ed
>when destroying the pdn.
>

My same point of view like previous, if the memory is not in the concern, how
about define them static?

And for the long term, we may support more VFs. Then at that moment, we need
to restructure the code to meet it.

So I suggest if we want to allocate it dynamically, we allocate the exact
number of space.
Gavin Shan Aug. 7, 2015, 3:43 a.m. UTC | #5
On Fri, Aug 07, 2015 at 10:33:33AM +0800, Wei Yang wrote:
>On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>>On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>>discrete.
>>>>>
>>>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>>BAR is set to Single PE mode.
>>>>>
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>---
>>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>>
>>>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>>index 8aeba4c..72415c7 100644
>>>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>>>> #ifdef CONFIG_PCI_IOV
>>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>>-	int     offset;			/* PE# for the first VF PE */
>>>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>>> #define IODA_INVALID_M64        (-1)
>>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>>
>>>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>>all of them will be used, not too much memory will be wasted.
>>>>
>>>
>>>Thanks for your comment.
>>>
>>>I have thought about change the name to make it more self explain. While
>>>another fact I want to take in is this field is also used to be reflect the
>>>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>>
>>>How about use "enum", one maintain the name "offset", and another one rename to
>>>"pe_num_map". And use the meaningful name at proper place?
>>>
>
>So I suppose you agree with my naming proposal.
>

No, I dislike the "enum" things.

>>
>>Ok. I'm explaining it with more details. There are two cases: single vs shared
>>mode. When PHB M64 BARs run in single mode, you need an array to track the
>>allocated discrete PE#. The VF_index is the index to the array. When PHB M64
>>BARs run in shared mode, you need continuous PE#. No array required for this
>>case. Instead, the starting PE# should be stored to somewhere, which can
>>be pdn->offset[0] simply.
>>
>>So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
>>*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
>>can enable (max_vf_num + 1) VFs.
>
>The max_vf_num is 15?
>

I don't understand why you said: the max_vf_num is 15. Since max_vf_num is variable
on different PFs, how can it be fixed value - 15 ?

>>
>>With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
>>when enabling SRIOV capability and releasing on disabling SRIOV capability.
>>Instead, those two arrays can be allocated during resource fixup time and free'ed
>>when destroying the pdn.
>>
>
>My same point of view like previous, if the memory is not in the concern, how
>about define them static?
>

It's a bad idea from my review. How many entries this array is going to have?
256 * NUM_OF_MAX_VF_BARS ?

>And for the long term, we may support more VFs. Then at that moment, we need
>to restructure the code to meet it.
>
>So I suggest if we want to allocate it dynamically, we allocate the exact
>number of space.
>

Fine... it can be improved when it has to be, as you said.
Wei Yang Aug. 7, 2015, 5:44 a.m. UTC | #6
On Fri, Aug 07, 2015 at 01:43:01PM +1000, Gavin Shan wrote:
>On Fri, Aug 07, 2015 at 10:33:33AM +0800, Wei Yang wrote:
>>On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>>>On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>>>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>>>discrete.
>>>>>>
>>>>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>>>BAR is set to Single PE mode.
>>>>>>
>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>---
>>>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>>>
>>>>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>index 8aeba4c..72415c7 100644
>>>>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>>>>> #ifdef CONFIG_PCI_IOV
>>>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>>>-	int     offset;			/* PE# for the first VF PE */
>>>>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>>>> #define IODA_INVALID_M64        (-1)
>>>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>>>
>>>>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>>>all of them will be used, not too much memory will be wasted.
>>>>>
>>>>
>>>>Thanks for your comment.
>>>>
>>>>I have thought about change the name to make it more self explain. While
>>>>another fact I want to take in is this field is also used to be reflect the
>>>>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>>>
>>>>How about use "enum", one maintain the name "offset", and another one rename to
>>>>"pe_num_map". And use the meaningful name at proper place?
>>>>
>>
>>So I suppose you agree with my naming proposal.
>>
>
>No, I dislike the "enum" things.
>

OK, then you suggest to rename it pe_num_map or keep it as offset?

>>>
>>>Ok. I'm explaining it with more details. There are two cases: single vs shared
>>>mode. When PHB M64 BARs run in single mode, you need an array to track the
>>>allocated discrete PE#. The VF_index is the index to the array. When PHB M64
>>>BARs run in shared mode, you need continuous PE#. No array required for this
>>>case. Instead, the starting PE# should be stored to somewhere, which can
>>>be pdn->offset[0] simply.
>>>
>>>So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
>>>*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
>>>can enable (max_vf_num + 1) VFs.
>>
>>The max_vf_num is 15?
>>
>
>I don't understand why you said: the max_vf_num is 15. Since max_vf_num is variable
>on different PFs, how can it be fixed value - 15 ?
>

In Shared PE case, only one int to indicate the start PE# is fine.
In Single PE mode, we totally could enable 15 VF, the same number of PEs for
each VF, which is limited by the number M64 BARs we have in the system.

If not, the number you expected is total_vfs?

>>>
>>>With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
>>>when enabling SRIOV capability and releasing on disabling SRIOV capability.
>>>Instead, those two arrays can be allocated during resource fixup time and free'ed
>>>when destroying the pdn.
>>>
>>
>>My same point of view like previous, if the memory is not in the concern, how
>>about define them static?
>>
>
>It's a bad idea from my review. How many entries this array is going to have?
>256 * NUM_OF_MAX_VF_BARS ?
>

No.

It has 15 * 6, 15 VFs we could enable at most and 6 VF BARs a VF could have at
most.

>>And for the long term, we may support more VFs. Then at that moment, we need
>>to restructure the code to meet it.
>>
>>So I suggest if we want to allocate it dynamically, we allocate the exact
>>number of space.
>>
>
>Fine... it can be improved when it has to be, as you said.
>
Gavin Shan Aug. 7, 2015, 5:54 a.m. UTC | #7
On Fri, Aug 07, 2015 at 01:44:33PM +0800, Wei Yang wrote:
>On Fri, Aug 07, 2015 at 01:43:01PM +1000, Gavin Shan wrote:
>>On Fri, Aug 07, 2015 at 10:33:33AM +0800, Wei Yang wrote:
>>>On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>>>>On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>>>>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>>>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>>>>discrete.
>>>>>>>
>>>>>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>>>>BAR is set to Single PE mode.
>>>>>>>
>>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>>---
>>>>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>>>>
>>>>>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>index 8aeba4c..72415c7 100644
>>>>>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>>>>>> #ifdef CONFIG_PCI_IOV
>>>>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>>>>-	int     offset;			/* PE# for the first VF PE */
>>>>>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>>>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>>>>> #define IODA_INVALID_M64        (-1)
>>>>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>>>>
>>>>>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>>>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>>>>all of them will be used, not too much memory will be wasted.
>>>>>>
>>>>>
>>>>>Thanks for your comment.
>>>>>
>>>>>I have thought about change the name to make it more self explain. While
>>>>>another fact I want to take in is this field is also used to be reflect the
>>>>>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>>>>
>>>>>How about use "enum", one maintain the name "offset", and another one rename to
>>>>>"pe_num_map". And use the meaningful name at proper place?
>>>>>
>>>
>>>So I suppose you agree with my naming proposal.
>>>
>>
>>No, I dislike the "enum" things.
>>
>
>OK, then you suggest to rename it pe_num_map or keep it as offset?
>

pe_num_map would be better.

>>>>
>>>>Ok. I'm explaining it with more details. There are two cases: single vs shared
>>>>mode. When PHB M64 BARs run in single mode, you need an array to track the
>>>>allocated discrete PE#. The VF_index is the index to the array. When PHB M64
>>>>BARs run in shared mode, you need continuous PE#. No array required for this
>>>>case. Instead, the starting PE# should be stored to somewhere, which can
>>>>be pdn->offset[0] simply.
>>>>
>>>>So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
>>>>*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
>>>>can enable (max_vf_num + 1) VFs.
>>>
>>>The max_vf_num is 15?
>>>
>>
>>I don't understand why you said: the max_vf_num is 15. Since max_vf_num is variable
>>on different PFs, how can it be fixed value - 15 ?
>>
>
>In Shared PE case, only one int to indicate the start PE# is fine.
>In Single PE mode, we totally could enable 15 VF, the same number of PEs for
>each VF, which is limited by the number M64 BARs we have in the system.
>
>If not, the number you expected is total_vfs?
>

then it should be min(total_vfs, phb->ioda.m64_bar_idx), isn't it? 

>>>>
>>>>With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
>>>>when enabling SRIOV capability and releasing on disabling SRIOV capability.
>>>>Instead, those two arrays can be allocated during resource fixup time and free'ed
>>>>when destroying the pdn.
>>>>
>>>
>>>My same point of view like previous, if the memory is not in the concern, how
>>>about define them static?
>>>
>>
>>It's a bad idea from my review. How many entries this array is going to have?
>>256 * NUM_OF_MAX_VF_BARS ?
>>
>
>No.
>
>It has 15 * 6, 15 VFs we could enable at most and 6 VF BARs a VF could have at
>most.
>

It's min(total_vfs, phb->ioda.m64_bar_idx) VFs that can be enabled at maximal
degree, no?

>>>And for the long term, we may support more VFs. Then at that moment, we need
>>>to restructure the code to meet it.
>>>
>>>So I suggest if we want to allocate it dynamically, we allocate the exact
>>>number of space.
>>>
>>
>>Fine... it can be improved when it has to be, as you said.
>>
>
>-- 
>Richard Yang
>Help you, Help me
Wei Yang Aug. 7, 2015, 6:25 a.m. UTC | #8
On Fri, Aug 07, 2015 at 03:54:48PM +1000, Gavin Shan wrote:
>On Fri, Aug 07, 2015 at 01:44:33PM +0800, Wei Yang wrote:
>>On Fri, Aug 07, 2015 at 01:43:01PM +1000, Gavin Shan wrote:
>>>On Fri, Aug 07, 2015 at 10:33:33AM +0800, Wei Yang wrote:
>>>>On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>>>>>On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>>>>>On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>>>>>On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>>>>>When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>>>>>discrete.
>>>>>>>>
>>>>>>>>This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>>>>>BAR is set to Single PE mode.
>>>>>>>>
>>>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>>>---
>>>>>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>>>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>>>>>
>>>>>>>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>>index 8aeba4c..72415c7 100644
>>>>>>>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>>@@ -213,7 +213,7 @@ struct pci_dn {
>>>>>>>> #ifdef CONFIG_PCI_IOV
>>>>>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>>>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>>>>>-	int     offset;			/* PE# for the first VF PE */
>>>>>>>>+	int     *offset;		/* PE# for the first VF PE or array */
>>>>>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>>>>>> #define IODA_INVALID_M64        (-1)
>>>>>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>>>>>
>>>>>>>how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>>>>>I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>>>>>all of them will be used, not too much memory will be wasted.
>>>>>>>
>>>>>>
>>>>>>Thanks for your comment.
>>>>>>
>>>>>>I have thought about change the name to make it more self explain. While
>>>>>>another fact I want to take in is this field is also used to be reflect the
>>>>>>shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>>>>>
>>>>>>How about use "enum", one maintain the name "offset", and another one rename to
>>>>>>"pe_num_map". And use the meaningful name at proper place?
>>>>>>
>>>>
>>>>So I suppose you agree with my naming proposal.
>>>>
>>>
>>>No, I dislike the "enum" things.
>>>
>>
>>OK, then you suggest to rename it pe_num_map or keep it as offset?
>>
>
>pe_num_map would be better.
>
>>>>>
>>>>>Ok. I'm explaining it with more details. There are two cases: single vs shared
>>>>>mode. When PHB M64 BARs run in single mode, you need an array to track the
>>>>>allocated discrete PE#. The VF_index is the index to the array. When PHB M64
>>>>>BARs run in shared mode, you need continuous PE#. No array required for this
>>>>>case. Instead, the starting PE# should be stored to somewhere, which can
>>>>>be pdn->offset[0] simply.
>>>>>
>>>>>So when allocating memory for this array, you just simply allocate (sizeof(*pdn->offset)
>>>>>*max_vf_num) no matter what mode PHB's M64 BARs will run in. The point is nobody
>>>>>can enable (max_vf_num + 1) VFs.
>>>>
>>>>The max_vf_num is 15?
>>>>
>>>
>>>I don't understand why you said: the max_vf_num is 15. Since max_vf_num is variable
>>>on different PFs, how can it be fixed value - 15 ?
>>>
>>
>>In Shared PE case, only one int to indicate the start PE# is fine.
>>In Single PE mode, we totally could enable 15 VF, the same number of PEs for
>>each VF, which is limited by the number M64 BARs we have in the system.
>>
>>If not, the number you expected is total_vfs?
>>
>
>then it should be min(total_vfs, phb->ioda.m64_bar_idx), isn't it? 
>
>>>>>
>>>>>With above way, the arrays for PE# and M64 BAR remapping needn't be allocated
>>>>>when enabling SRIOV capability and releasing on disabling SRIOV capability.
>>>>>Instead, those two arrays can be allocated during resource fixup time and free'ed
>>>>>when destroying the pdn.
>>>>>
>>>>
>>>>My same point of view like previous, if the memory is not in the concern, how
>>>>about define them static?
>>>>
>>>
>>>It's a bad idea from my review. How many entries this array is going to have?
>>>256 * NUM_OF_MAX_VF_BARS ?
>>>
>>
>>No.
>>
>>It has 15 * 6, 15 VFs we could enable at most and 6 VF BARs a VF could have at
>>most.
>>
>
>It's min(total_vfs, phb->ioda.m64_bar_idx) VFs that can be enabled at maximal
>degree, no?
>

Yes, you are right. The number 15 is the one I used when the field is static.
If we want to allocate it dynamically, we need to choose the smaller one.

While I suggest to even improve this formula to min(num_vfs, m64_bar_idx),
since num_vfs <= total_vfs always. That's why num_vfs entries are allocate in
the code. 

>>>>And for the long term, we may support more VFs. Then at that moment, we need
>>>>to restructure the code to meet it.
>>>>
>>>>So I suggest if we want to allocate it dynamically, we allocate the exact
>>>>number of space.
>>>>
>>>
>>>Fine... it can be improved when it has to be, as you said.
>>>
>>
>>-- 
>>Richard Yang
>>Help you, Help me
Alexey Kardashevskiy Aug. 7, 2015, 10 a.m. UTC | #9
On 08/07/2015 03:54 PM, Gavin Shan wrote:
> On Fri, Aug 07, 2015 at 01:44:33PM +0800, Wei Yang wrote:
>> On Fri, Aug 07, 2015 at 01:43:01PM +1000, Gavin Shan wrote:
>>> On Fri, Aug 07, 2015 at 10:33:33AM +0800, Wei Yang wrote:
>>>> On Fri, Aug 07, 2015 at 11:36:56AM +1000, Gavin Shan wrote:
>>>>> On Thu, Aug 06, 2015 at 09:41:41PM +0800, Wei Yang wrote:
>>>>>> On Thu, Aug 06, 2015 at 03:36:01PM +1000, Gavin Shan wrote:
>>>>>>> On Wed, Aug 05, 2015 at 09:25:03AM +0800, Wei Yang wrote:
>>>>>>>> When M64 BAR is set to Single PE mode, the PE# assigned to VF could be
>>>>>>>> discrete.

s/discrete/sparse/ may be?


>>>>>>>>
>>>>>>>> This patch restructures the patch to allocate discrete PE# for VFs when M64
>>>>>>>> BAR is set to Single PE mode.
>>>>>>>>
>>>>>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>>>> ---
>>>>>>>> arch/powerpc/include/asm/pci-bridge.h     |    2 +-
>>>>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   69 +++++++++++++++++++++--------
>>>>>>>> 2 files changed, 51 insertions(+), 20 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>> index 8aeba4c..72415c7 100644
>>>>>>>> --- a/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>> +++ b/arch/powerpc/include/asm/pci-bridge.h
>>>>>>>> @@ -213,7 +213,7 @@ struct pci_dn {
>>>>>>>> #ifdef CONFIG_PCI_IOV
>>>>>>>> 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>>>>>>> 	u16     num_vfs;		/* number of VFs enabled*/
>>>>>>>> -	int     offset;			/* PE# for the first VF PE */
>>>>>>>> +	int     *offset;		/* PE# for the first VF PE or array */
>>>>>>>> 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
>>>>>>>> #define IODA_INVALID_M64        (-1)
>>>>>>>> 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
>>>>>>>
>>>>>>> how about renaming "offset" to "pe_num_map", or "pe_map" ? Similar to the comments
>>>>>>> I gave to the "m64_bar_map", num_of_max_vfs entries can be allocated. Though not
>>>>>>> all of them will be used, not too much memory will be wasted.
>>>>>>>
>>>>>>
>>>>>> Thanks for your comment.
>>>>>>
>>>>>> I have thought about change the name to make it more self explain. While
>>>>>> another fact I want to take in is this field is also used to be reflect the
>>>>>> shift offset when M64 BAR is used in the Shared Mode. So I maintain the name.
>>>>>>
>>>>>> How about use "enum", one maintain the name "offset", and another one rename to
>>>>>> "pe_num_map". And use the meaningful name at proper place?
>>>>>>
>>>>
>>>> So I suppose you agree with my naming proposal.
>>>>
>>>
>>> No, I dislike the "enum" things.
>>>
>>
>> OK, then you suggest to rename it pe_num_map or keep it as offset?
>>
>
> pe_num_map would be better.


+1. @offset is very confusing.

It could be a linked list actually, "struct list_head pe_list" in pci_dn 
and "struct list_head next" in struct pnv_ioda_pe.  I could not quickly 
spot places where you would access this array outside a loop 
for(i=0;i<num_vfs;++i).
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 8aeba4c..72415c7 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -213,7 +213,7 @@  struct pci_dn {
 #ifdef CONFIG_PCI_IOV
 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
 	u16     num_vfs;		/* number of VFs enabled*/
-	int     offset;			/* PE# for the first VF PE */
+	int     *offset;		/* PE# for the first VF PE or array */
 	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
 #define IODA_INVALID_M64        (-1)
 	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 4042303..9953829 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1243,7 +1243,7 @@  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
 
 			/* Map the M64 here */
 			if (pdn->m64_single_mode) {
-				pe_num = pdn->offset + j;
+				pe_num = pdn->offset[j];
 				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
 						pe_num, OPAL_M64_WINDOW_TYPE,
 						pdn->m64_map[j][i], 0);
@@ -1347,7 +1347,7 @@  void pnv_pci_sriov_disable(struct pci_dev *pdev)
 	struct pnv_phb        *phb;
 	struct pci_dn         *pdn;
 	struct pci_sriov      *iov;
-	u16 num_vfs;
+	u16                    num_vfs, i;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
@@ -1361,14 +1361,18 @@  void pnv_pci_sriov_disable(struct pci_dev *pdev)
 
 	if (phb->type == PNV_PHB_IODA2) {
 		if (!pdn->m64_single_mode)
-			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+			pnv_pci_vf_resource_shift(pdev, -*pdn->offset);
 
 		/* Release M64 windows */
 		pnv_pci_vf_release_m64(pdev, num_vfs);
 
 		/* Release PE numbers */
-		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
-		pdn->offset = 0;
+		if (pdn->m64_single_mode) {
+			for (i = 0; i < num_vfs; i++)
+				pnv_ioda_free_pe(phb, pdn->offset[i]);
+		} else
+			bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
+		kfree(pdn->offset);
 	}
 }
 
@@ -1394,7 +1398,10 @@  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 
 	/* Reserve PE for each VF */
 	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
-		pe_num = pdn->offset + vf_index;
+		if (pdn->m64_single_mode)
+			pe_num = pdn->offset[vf_index];
+		else
+			pe_num = *pdn->offset + vf_index;
 
 		pe = &phb->ioda.pe_array[pe_num];
 		pe->pe_number = pe_num;
@@ -1436,6 +1443,7 @@  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 	struct pnv_phb        *phb;
 	struct pci_dn         *pdn;
 	int                    ret;
+	u16                    i;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
@@ -1462,19 +1470,38 @@  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 		}
 
 		/* Calculate available PE for required VFs */
-		mutex_lock(&phb->ioda.pe_alloc_mutex);
-		pdn->offset = bitmap_find_next_zero_area(
-			phb->ioda.pe_alloc, phb->ioda.total_pe,
-			0, num_vfs, 0);
-		if (pdn->offset >= phb->ioda.total_pe) {
+		if (pdn->m64_single_mode) {
+			pdn->offset = kmalloc(sizeof(*pdn->offset) * num_vfs,
+					GFP_KERNEL);
+			if (!pdn->offset)
+				return -ENOMEM;
+			for (i = 0; i < num_vfs; i++)
+				pdn->offset[i] = IODA_INVALID_PE;
+			for (i = 0; i < num_vfs; i++) {
+				pdn->offset[i] = pnv_ioda_alloc_pe(phb);
+				if (pdn->offset[i] == IODA_INVALID_PE) {
+					ret = -EBUSY;
+					goto m64_failed;
+				}
+			}
+		} else {
+			pdn->offset = kmalloc(sizeof(*pdn->offset), GFP_KERNEL);
+			if (!pdn->offset)
+				return -ENOMEM;
+			mutex_lock(&phb->ioda.pe_alloc_mutex);
+			*pdn->offset = bitmap_find_next_zero_area(
+				phb->ioda.pe_alloc, phb->ioda.total_pe,
+				0, num_vfs, 0);
+			if (*pdn->offset >= phb->ioda.total_pe) {
+				mutex_unlock(&phb->ioda.pe_alloc_mutex);
+				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+				kfree(pdn->offset);
+				return -EBUSY;
+			}
+			bitmap_set(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
 			mutex_unlock(&phb->ioda.pe_alloc_mutex);
-			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
-			pdn->offset = 0;
-			return -EBUSY;
 		}
-		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
 		pdn->num_vfs = num_vfs;
-		mutex_unlock(&phb->ioda.pe_alloc_mutex);
 
 		/* Assign M64 window accordingly */
 		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
@@ -1489,7 +1516,7 @@  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 		 * Otherwise, the PE# for the VF will conflict with others.
 		 */
 		if (!pdn->m64_single_mode) {
-			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+			ret = pnv_pci_vf_resource_shift(pdev, *pdn->offset);
 			if (ret)
 				goto m64_failed;
 		}
@@ -1501,8 +1528,12 @@  int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 	return 0;
 
 m64_failed:
-	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
-	pdn->offset = 0;
+	if (pdn->m64_single_mode) {
+		for (i = 0; i < num_vfs; i++)
+			pnv_ioda_free_pe(phb, pdn->offset[i]);
+	} else
+		bitmap_clear(phb->ioda.pe_alloc, *pdn->offset, num_vfs);
+	kfree(pdn->offset);
 
 	return ret;
 }