diff mbox

[V13,15/21] powerpc/powernv: Reserve additional space for IOV BAR according to the number of total_pe

Message ID 1425446353-30513-16-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

Wei Yang March 4, 2015, 5:19 a.m. UTC
On PHB3, PF IOV BAR will be covered by M64 window to have better PE
isolation.  The total_pe number is usually different from total_VFs, which
can lead to a conflict between MMIO space and the PE number.

For example, if total_VFs is 128 and total_pe is 256, the second half of
M64 window will be part of other PCI device, which may already belong
to other PEs.

Prevent the conflict by reserving additional space for the PF IOV BAR,
which is total_pe number of VF's BAR size.

[bhelgaas: make dev_printk() output more consistent, index resource[]
conventionally]
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/machdep.h        |    4 ++
 arch/powerpc/include/asm/pci-bridge.h     |    3 ++
 arch/powerpc/kernel/pci-common.c          |    5 +++
 arch/powerpc/kernel/pci-hotplug.c         |    4 ++
 arch/powerpc/platforms/powernv/pci-ioda.c |   61 +++++++++++++++++++++++++++++
 5 files changed, 77 insertions(+)

Comments

Bjorn Helgaas March 11, 2015, 1:55 p.m. UTC | #1
On Wed, Mar 04, 2015 at 01:19:07PM +0800, Wei Yang wrote:
> On PHB3, PF IOV BAR will be covered by M64 window to have better PE
> isolation.  The total_pe number is usually different from total_VFs, which
> can lead to a conflict between MMIO space and the PE number.
> 
> For example, if total_VFs is 128 and total_pe is 256, the second half of
> M64 window will be part of other PCI device, which may already belong
> to other PEs.
> 
> Prevent the conflict by reserving additional space for the PF IOV BAR,
> which is total_pe number of VF's BAR size.
> 
> [bhelgaas: make dev_printk() output more consistent, index resource[]
> conventionally]
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/machdep.h        |    4 ++
>  arch/powerpc/include/asm/pci-bridge.h     |    3 ++
>  arch/powerpc/kernel/pci-common.c          |    5 +++
>  arch/powerpc/kernel/pci-hotplug.c         |    4 ++
>  arch/powerpc/platforms/powernv/pci-ioda.c |   61 +++++++++++++++++++++++++++++
>  5 files changed, 77 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index c8175a3..965547c 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -250,6 +250,10 @@ struct machdep_calls {
>  	/* Reset the secondary bus of bridge */
>  	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
>  
> +#ifdef CONFIG_PCI_IOV
> +	void (*pcibios_fixup_sriov)(struct pci_bus *bus);
> +#endif /* CONFIG_PCI_IOV */
> +
>  	/* Called to shutdown machine specific hardware not already controlled
>  	 * by other drivers.
>  	 */
> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
> index 513f8f2..de11de7 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -175,6 +175,9 @@ struct pci_dn {
>  #define IODA_INVALID_PE		(-1)
>  #ifdef CONFIG_PPC_POWERNV
>  	int	pe_number;
> +#ifdef CONFIG_PCI_IOV
> +	u16     max_vfs;		/* number of VFs IOV BAR expended */
> +#endif /* CONFIG_PCI_IOV */
>  #endif
>  	struct list_head child_list;
>  	struct list_head list;
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index 8203101..022e9fe 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -1646,6 +1646,11 @@ void pcibios_scan_phb(struct pci_controller *hose)
>  	if (ppc_md.pcibios_fixup_phb)
>  		ppc_md.pcibios_fixup_phb(hose);
>  
> +#ifdef CONFIG_PCI_IOV
> +	if (ppc_md.pcibios_fixup_sriov)
> +		ppc_md.pcibios_fixup_sriov(bus);
> +#endif /* CONFIG_PCI_IOV */

Here, and ...

> +
>  	/* Configure PCI Express settings */
>  	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
>  		struct pci_bus *child;
> diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
> index 5b78917..7d238ae 100644
> --- a/arch/powerpc/kernel/pci-hotplug.c
> +++ b/arch/powerpc/kernel/pci-hotplug.c
> @@ -94,6 +94,10 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
>  		 */
>  		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
>  		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
> +#ifdef CONFIG_PCI_IOV
> +		if (ppc_md.pcibios_fixup_sriov)
> +			ppc_md.pcibios_fixup_sriov(bus);
> +#endif /* CONFIG_PCI_IOV */

here, you have the same code.  It's good that we now do it for hot-added
devices as well as those present at boot.  But it's bad that it happens in
two different paths.

Isn't there some way we can unify this so the same path is used for the
initial pcibios_scan_phb() and also the hot-add case?  Maybe call
pcibios_fixup_sriov() from pcibios_add_device()?

>  		pcibios_setup_bus_devices(bus);
>  		max = bus->busn_res.start;
>  		for (pass = 0; pass < 2; pass++) {
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 1b37066..958c7a3 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1749,6 +1749,64 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
>  #endif /* CONFIG_PCI_MSI */
>  
> +#ifdef CONFIG_PCI_IOV
> +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
> +{
> +	struct pci_controller *hose;
> +	struct pnv_phb *phb;
> +	struct resource *res;
> +	int i;
> +	resource_size_t size;
> +	struct pci_dn *pdn;
> +
> +	if (!pdev->is_physfn || pdev->is_added)
> +		return;
> +
> +	hose = pci_bus_to_host(pdev->bus);
> +	phb = hose->private_data;
> +
> +	pdn = pci_get_pdn(pdev);
> +	pdn->max_vfs = 0;
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || res->parent)
> +			continue;
> +		if (!pnv_pci_is_mem_pref_64(res->flags)) {
> +			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
> +				 i, res);
> +			continue;
> +		}
> +
> +		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
> +		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
> +		res->end = res->start + size * phb->ioda.total_pe - 1;
> +		dev_dbg(&pdev->dev, "                       %pR\n", res);
> +		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
> +				i, res, phb->ioda.total_pe);
> +	}
> +	pdn->max_vfs = phb->ioda.total_pe;
> +}
> +
> +static void pnv_pci_ioda_fixup_sriov(struct pci_bus *bus)
> +{
> +	struct pci_dev *pdev;
> +	struct pci_bus *b;
> +
> +	list_for_each_entry(pdev, &bus->devices, bus_list) {
> +		b = pdev->subordinate;
> +
> +		if (b)
> +			pnv_pci_ioda_fixup_sriov(b);
> +
> +		if (pdev->is_added)
> +			continue;
> +
> +		pnv_pci_ioda_fixup_iov_resources(pdev);
> +	}
> +}
> +#endif /* CONFIG_PCI_IOV */
> +
>  /*
>   * This function is supposed to be called on basis of PE from top
>   * to bottom style. So the the I/O or MMIO segment assigned to
> @@ -2125,6 +2183,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>  	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
>  	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
>  	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
> +#ifdef CONFIG_PCI_IOV
> +	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_sriov;
> +#endif /* CONFIG_PCI_IOV */
>  	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
>  
>  	/* Reset IODA tables to a clean state */
> -- 
> 1.7.9.5
>
Wei Yang March 12, 2015, 1:15 a.m. UTC | #2
On Wed, Mar 11, 2015 at 08:55:07AM -0500, Bjorn Helgaas wrote:
>On Wed, Mar 04, 2015 at 01:19:07PM +0800, Wei Yang wrote:
>> On PHB3, PF IOV BAR will be covered by M64 window to have better PE
>> isolation.  The total_pe number is usually different from total_VFs, which
>> can lead to a conflict between MMIO space and the PE number.
>> 
>> For example, if total_VFs is 128 and total_pe is 256, the second half of
>> M64 window will be part of other PCI device, which may already belong
>> to other PEs.
>> 
>> Prevent the conflict by reserving additional space for the PF IOV BAR,
>> which is total_pe number of VF's BAR size.
>> 
>> [bhelgaas: make dev_printk() output more consistent, index resource[]
>> conventionally]
>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/machdep.h        |    4 ++
>>  arch/powerpc/include/asm/pci-bridge.h     |    3 ++
>>  arch/powerpc/kernel/pci-common.c          |    5 +++
>>  arch/powerpc/kernel/pci-hotplug.c         |    4 ++
>>  arch/powerpc/platforms/powernv/pci-ioda.c |   61 +++++++++++++++++++++++++++++
>>  5 files changed, 77 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
>> index c8175a3..965547c 100644
>> --- a/arch/powerpc/include/asm/machdep.h
>> +++ b/arch/powerpc/include/asm/machdep.h
>> @@ -250,6 +250,10 @@ struct machdep_calls {
>>  	/* Reset the secondary bus of bridge */
>>  	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
>>  
>> +#ifdef CONFIG_PCI_IOV
>> +	void (*pcibios_fixup_sriov)(struct pci_bus *bus);
>> +#endif /* CONFIG_PCI_IOV */
>> +
>>  	/* Called to shutdown machine specific hardware not already controlled
>>  	 * by other drivers.
>>  	 */
>> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>> index 513f8f2..de11de7 100644
>> --- a/arch/powerpc/include/asm/pci-bridge.h
>> +++ b/arch/powerpc/include/asm/pci-bridge.h
>> @@ -175,6 +175,9 @@ struct pci_dn {
>>  #define IODA_INVALID_PE		(-1)
>>  #ifdef CONFIG_PPC_POWERNV
>>  	int	pe_number;
>> +#ifdef CONFIG_PCI_IOV
>> +	u16     max_vfs;		/* number of VFs IOV BAR expended */
>> +#endif /* CONFIG_PCI_IOV */
>>  #endif
>>  	struct list_head child_list;
>>  	struct list_head list;
>> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
>> index 8203101..022e9fe 100644
>> --- a/arch/powerpc/kernel/pci-common.c
>> +++ b/arch/powerpc/kernel/pci-common.c
>> @@ -1646,6 +1646,11 @@ void pcibios_scan_phb(struct pci_controller *hose)
>>  	if (ppc_md.pcibios_fixup_phb)
>>  		ppc_md.pcibios_fixup_phb(hose);
>>  
>> +#ifdef CONFIG_PCI_IOV
>> +	if (ppc_md.pcibios_fixup_sriov)
>> +		ppc_md.pcibios_fixup_sriov(bus);
>> +#endif /* CONFIG_PCI_IOV */
>
>Here, and ...
>
>> +
>>  	/* Configure PCI Express settings */
>>  	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
>>  		struct pci_bus *child;
>> diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
>> index 5b78917..7d238ae 100644
>> --- a/arch/powerpc/kernel/pci-hotplug.c
>> +++ b/arch/powerpc/kernel/pci-hotplug.c
>> @@ -94,6 +94,10 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
>>  		 */
>>  		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
>>  		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
>> +#ifdef CONFIG_PCI_IOV
>> +		if (ppc_md.pcibios_fixup_sriov)
>> +			ppc_md.pcibios_fixup_sriov(bus);
>> +#endif /* CONFIG_PCI_IOV */
>
>here, you have the same code.  It's good that we now do it for hot-added
>devices as well as those present at boot.  But it's bad that it happens in
>two different paths.
>
>Isn't there some way we can unify this so the same path is used for the
>initial pcibios_scan_phb() and also the hot-add case?  Maybe call
>pcibios_fixup_sriov() from pcibios_add_device()?
>


This is a very good suggestion. I have changed this and works fine.

Thanks :-)

>>  		pcibios_setup_bus_devices(bus);
>>  		max = bus->busn_res.start;
>>  		for (pass = 0; pass < 2; pass++) {
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 1b37066..958c7a3 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -1749,6 +1749,64 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
>>  #endif /* CONFIG_PCI_MSI */
>>  
>> +#ifdef CONFIG_PCI_IOV
>> +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>> +{
>> +	struct pci_controller *hose;
>> +	struct pnv_phb *phb;
>> +	struct resource *res;
>> +	int i;
>> +	resource_size_t size;
>> +	struct pci_dn *pdn;
>> +
>> +	if (!pdev->is_physfn || pdev->is_added)
>> +		return;
>> +
>> +	hose = pci_bus_to_host(pdev->bus);
>> +	phb = hose->private_data;
>> +
>> +	pdn = pci_get_pdn(pdev);
>> +	pdn->max_vfs = 0;
>> +
>> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>> +		if (!res->flags || res->parent)
>> +			continue;
>> +		if (!pnv_pci_is_mem_pref_64(res->flags)) {
>> +			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
>> +				 i, res);
>> +			continue;
>> +		}
>> +
>> +		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>> +		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
>> +		res->end = res->start + size * phb->ioda.total_pe - 1;
>> +		dev_dbg(&pdev->dev, "                       %pR\n", res);
>> +		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
>> +				i, res, phb->ioda.total_pe);
>> +	}
>> +	pdn->max_vfs = phb->ioda.total_pe;
>> +}
>> +
>> +static void pnv_pci_ioda_fixup_sriov(struct pci_bus *bus)
>> +{
>> +	struct pci_dev *pdev;
>> +	struct pci_bus *b;
>> +
>> +	list_for_each_entry(pdev, &bus->devices, bus_list) {
>> +		b = pdev->subordinate;
>> +
>> +		if (b)
>> +			pnv_pci_ioda_fixup_sriov(b);
>> +
>> +		if (pdev->is_added)
>> +			continue;
>> +
>> +		pnv_pci_ioda_fixup_iov_resources(pdev);
>> +	}
>> +}
>> +#endif /* CONFIG_PCI_IOV */
>> +
>>  /*
>>   * This function is supposed to be called on basis of PE from top
>>   * to bottom style. So the the I/O or MMIO segment assigned to
>> @@ -2125,6 +2183,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>>  	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
>>  	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
>>  	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
>> +#ifdef CONFIG_PCI_IOV
>> +	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_sriov;
>> +#endif /* CONFIG_PCI_IOV */
>>  	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
>>  
>>  	/* Reset IODA tables to a clean state */
>> -- 
>> 1.7.9.5
>>
Bjorn Helgaas March 19, 2015, 3:08 p.m. UTC | #3
On Thu, Mar 12, 2015 at 09:15:17AM +0800, Wei Yang wrote:
> On Wed, Mar 11, 2015 at 08:55:07AM -0500, Bjorn Helgaas wrote:
> >On Wed, Mar 04, 2015 at 01:19:07PM +0800, Wei Yang wrote:
> >> On PHB3, PF IOV BAR will be covered by M64 window to have better PE
> >> isolation.  The total_pe number is usually different from total_VFs, which
> >> can lead to a conflict between MMIO space and the PE number.
> >> 
> >> For example, if total_VFs is 128 and total_pe is 256, the second half of
> >> M64 window will be part of other PCI device, which may already belong
> >> to other PEs.
> >> 
> >> Prevent the conflict by reserving additional space for the PF IOV BAR,
> >> which is total_pe number of VF's BAR size.
> >> 
> >> [bhelgaas: make dev_printk() output more consistent, index resource[]
> >> conventionally]
> >> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> >> ---
> >>  arch/powerpc/include/asm/machdep.h        |    4 ++
> >>  arch/powerpc/include/asm/pci-bridge.h     |    3 ++
> >>  arch/powerpc/kernel/pci-common.c          |    5 +++
> >>  arch/powerpc/kernel/pci-hotplug.c         |    4 ++
> >>  arch/powerpc/platforms/powernv/pci-ioda.c |   61 +++++++++++++++++++++++++++++
> >>  5 files changed, 77 insertions(+)
> >> 
> >> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> >> index c8175a3..965547c 100644
> >> --- a/arch/powerpc/include/asm/machdep.h
> >> +++ b/arch/powerpc/include/asm/machdep.h
> >> @@ -250,6 +250,10 @@ struct machdep_calls {
> >>  	/* Reset the secondary bus of bridge */
> >>  	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
> >>  
> >> +#ifdef CONFIG_PCI_IOV
> >> +	void (*pcibios_fixup_sriov)(struct pci_bus *bus);
> >> +#endif /* CONFIG_PCI_IOV */
> >> +
> >>  	/* Called to shutdown machine specific hardware not already controlled
> >>  	 * by other drivers.
> >>  	 */
> >> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
> >> index 513f8f2..de11de7 100644
> >> --- a/arch/powerpc/include/asm/pci-bridge.h
> >> +++ b/arch/powerpc/include/asm/pci-bridge.h
> >> @@ -175,6 +175,9 @@ struct pci_dn {
> >>  #define IODA_INVALID_PE		(-1)
> >>  #ifdef CONFIG_PPC_POWERNV
> >>  	int	pe_number;
> >> +#ifdef CONFIG_PCI_IOV
> >> +	u16     max_vfs;		/* number of VFs IOV BAR expended */
> >> +#endif /* CONFIG_PCI_IOV */
> >>  #endif
> >>  	struct list_head child_list;
> >>  	struct list_head list;
> >> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> >> index 8203101..022e9fe 100644
> >> --- a/arch/powerpc/kernel/pci-common.c
> >> +++ b/arch/powerpc/kernel/pci-common.c
> >> @@ -1646,6 +1646,11 @@ void pcibios_scan_phb(struct pci_controller *hose)
> >>  	if (ppc_md.pcibios_fixup_phb)
> >>  		ppc_md.pcibios_fixup_phb(hose);
> >>  
> >> +#ifdef CONFIG_PCI_IOV
> >> +	if (ppc_md.pcibios_fixup_sriov)
> >> +		ppc_md.pcibios_fixup_sriov(bus);
> >> +#endif /* CONFIG_PCI_IOV */
> >
> >Here, and ...
> >
> >> +
> >>  	/* Configure PCI Express settings */
> >>  	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
> >>  		struct pci_bus *child;
> >> diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
> >> index 5b78917..7d238ae 100644
> >> --- a/arch/powerpc/kernel/pci-hotplug.c
> >> +++ b/arch/powerpc/kernel/pci-hotplug.c
> >> @@ -94,6 +94,10 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
> >>  		 */
> >>  		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
> >>  		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
> >> +#ifdef CONFIG_PCI_IOV
> >> +		if (ppc_md.pcibios_fixup_sriov)
> >> +			ppc_md.pcibios_fixup_sriov(bus);
> >> +#endif /* CONFIG_PCI_IOV */
> >
> >here, you have the same code.  It's good that we now do it for hot-added
> >devices as well as those present at boot.  But it's bad that it happens in
> >two different paths.
> >
> >Isn't there some way we can unify this so the same path is used for the
> >initial pcibios_scan_phb() and also the hot-add case?  Maybe call
> >pcibios_fixup_sriov() from pcibios_add_device()?
> >
> 
> 
> This is a very good suggestion. I have changed this and works fine.

I was expecting a v14 series with this change.  Is it coming, or are you
waiting for something else from me?
Wei Yang March 19, 2015, 4:18 p.m. UTC | #4
Oh, I thought you are not comfortable with the "Patch v12 10/21 PCI:
Consider additional PF's IOV BAR alignment ..."

V14 is ready to send which is based on v4.0-rc1.

2015-03-19 23:08 GMT+08:00 Bjorn Helgaas <bhelgaas@google.com>:

> On Thu, Mar 12, 2015 at 09:15:17AM +0800, Wei Yang wrote:
> > On Wed, Mar 11, 2015 at 08:55:07AM -0500, Bjorn Helgaas wrote:
> > >On Wed, Mar 04, 2015 at 01:19:07PM +0800, Wei Yang wrote:
> > >> On PHB3, PF IOV BAR will be covered by M64 window to have better PE
> > >> isolation.  The total_pe number is usually different from total_VFs,
> which
> > >> can lead to a conflict between MMIO space and the PE number.
> > >>
> > >> For example, if total_VFs is 128 and total_pe is 256, the second half
> of
> > >> M64 window will be part of other PCI device, which may already belong
> > >> to other PEs.
> > >>
> > >> Prevent the conflict by reserving additional space for the PF IOV BAR,
> > >> which is total_pe number of VF's BAR size.
> > >>
> > >> [bhelgaas: make dev_printk() output more consistent, index resource[]
> > >> conventionally]
> > >> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> > >> ---
> > >>  arch/powerpc/include/asm/machdep.h        |    4 ++
> > >>  arch/powerpc/include/asm/pci-bridge.h     |    3 ++
> > >>  arch/powerpc/kernel/pci-common.c          |    5 +++
> > >>  arch/powerpc/kernel/pci-hotplug.c         |    4 ++
> > >>  arch/powerpc/platforms/powernv/pci-ioda.c |   61
> +++++++++++++++++++++++++++++
> > >>  5 files changed, 77 insertions(+)
> > >>
> > >> diff --git a/arch/powerpc/include/asm/machdep.h
> b/arch/powerpc/include/asm/machdep.h
> > >> index c8175a3..965547c 100644
> > >> --- a/arch/powerpc/include/asm/machdep.h
> > >> +++ b/arch/powerpc/include/asm/machdep.h
> > >> @@ -250,6 +250,10 @@ struct machdep_calls {
> > >>    /* Reset the secondary bus of bridge */
> > >>    void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
> > >>
> > >> +#ifdef CONFIG_PCI_IOV
> > >> +  void (*pcibios_fixup_sriov)(struct pci_bus *bus);
> > >> +#endif /* CONFIG_PCI_IOV */
> > >> +
> > >>    /* Called to shutdown machine specific hardware not already
> controlled
> > >>     * by other drivers.
> > >>     */
> > >> diff --git a/arch/powerpc/include/asm/pci-bridge.h
> b/arch/powerpc/include/asm/pci-bridge.h
> > >> index 513f8f2..de11de7 100644
> > >> --- a/arch/powerpc/include/asm/pci-bridge.h
> > >> +++ b/arch/powerpc/include/asm/pci-bridge.h
> > >> @@ -175,6 +175,9 @@ struct pci_dn {
> > >>  #define IODA_INVALID_PE           (-1)
> > >>  #ifdef CONFIG_PPC_POWERNV
> > >>    int     pe_number;
> > >> +#ifdef CONFIG_PCI_IOV
> > >> +  u16     max_vfs;                /* number of VFs IOV BAR expended
> */
> > >> +#endif /* CONFIG_PCI_IOV */
> > >>  #endif
> > >>    struct list_head child_list;
> > >>    struct list_head list;
> > >> diff --git a/arch/powerpc/kernel/pci-common.c
> b/arch/powerpc/kernel/pci-common.c
> > >> index 8203101..022e9fe 100644
> > >> --- a/arch/powerpc/kernel/pci-common.c
> > >> +++ b/arch/powerpc/kernel/pci-common.c
> > >> @@ -1646,6 +1646,11 @@ void pcibios_scan_phb(struct pci_controller
> *hose)
> > >>    if (ppc_md.pcibios_fixup_phb)
> > >>            ppc_md.pcibios_fixup_phb(hose);
> > >>
> > >> +#ifdef CONFIG_PCI_IOV
> > >> +  if (ppc_md.pcibios_fixup_sriov)
> > >> +          ppc_md.pcibios_fixup_sriov(bus);
> > >> +#endif /* CONFIG_PCI_IOV */
> > >
> > >Here, and ...
> > >
> > >> +
> > >>    /* Configure PCI Express settings */
> > >>    if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
> > >>            struct pci_bus *child;
> > >> diff --git a/arch/powerpc/kernel/pci-hotplug.c
> b/arch/powerpc/kernel/pci-hotplug.c
> > >> index 5b78917..7d238ae 100644
> > >> --- a/arch/powerpc/kernel/pci-hotplug.c
> > >> +++ b/arch/powerpc/kernel/pci-hotplug.c
> > >> @@ -94,6 +94,10 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
> > >>             */
> > >>            slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
> > >>            pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
> > >> +#ifdef CONFIG_PCI_IOV
> > >> +          if (ppc_md.pcibios_fixup_sriov)
> > >> +                  ppc_md.pcibios_fixup_sriov(bus);
> > >> +#endif /* CONFIG_PCI_IOV */
> > >
> > >here, you have the same code.  It's good that we now do it for hot-added
> > >devices as well as those present at boot.  But it's bad that it happens
> in
> > >two different paths.
> > >
> > >Isn't there some way we can unify this so the same path is used for the
> > >initial pcibios_scan_phb() and also the hot-add case?  Maybe call
> > >pcibios_fixup_sriov() from pcibios_add_device()?
> > >
> >
> >
> > This is a very good suggestion. I have changed this and works fine.
>
> I was expecting a v14 series with this change.  Is it coming, or are you
> waiting for something else from me?
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
Bjorn Helgaas March 19, 2015, 5:54 p.m. UTC | #5
On Thu, Mar 19, 2015 at 11:18 AM, Wei Yang <weiyang.kernel@gmail.com> wrote:
> Oh, I thought you are not comfortable with the "Patch v12 10/21 PCI:
> Consider additional PF's IOV BAR alignment ..."
>
> V14 is ready to send which is based on v4.0-rc1.

Unless I missed something, the last email in that thread [1] is from
you, so I think we're ready for the next iteration.

[1] http://lkml.kernel.org/r/20150224083406.32124.65957.stgit@bhelgaas-glaptop2.roam.corp.google.com
Wei Yang March 19, 2015, 11:49 p.m. UTC | #6
2015-03-20 1:54 GMT+08:00 Bjorn Helgaas <bhelgaas@google.com>:
> On Thu, Mar 19, 2015 at 11:18 AM, Wei Yang <weiyang.kernel@gmail.com> wrote:
>> Oh, I thought you are not comfortable with the "Patch v12 10/21 PCI:
>> Consider additional PF's IOV BAR alignment ..."
>>
>> V14 is ready to send which is based on v4.0-rc1.
>
> Unless I missed something, the last email in that thread [1] is from
> you, so I think we're ready for the next iteration.
>
> [1] http://lkml.kernel.org/r/20150224083406.32124.65957.stgit@bhelgaas-glaptop2.roam.corp.google.com


Great~~~ I thought you didn't get a chance to read it.

Will send out a v14 ASAP.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c8175a3..965547c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -250,6 +250,10 @@  struct machdep_calls {
 	/* Reset the secondary bus of bridge */
 	void  (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
 
+#ifdef CONFIG_PCI_IOV
+	void (*pcibios_fixup_sriov)(struct pci_bus *bus);
+#endif /* CONFIG_PCI_IOV */
+
 	/* Called to shutdown machine specific hardware not already controlled
 	 * by other drivers.
 	 */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 513f8f2..de11de7 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -175,6 +175,9 @@  struct pci_dn {
 #define IODA_INVALID_PE		(-1)
 #ifdef CONFIG_PPC_POWERNV
 	int	pe_number;
+#ifdef CONFIG_PCI_IOV
+	u16     max_vfs;		/* number of VFs IOV BAR expended */
+#endif /* CONFIG_PCI_IOV */
 #endif
 	struct list_head child_list;
 	struct list_head list;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 8203101..022e9fe 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1646,6 +1646,11 @@  void pcibios_scan_phb(struct pci_controller *hose)
 	if (ppc_md.pcibios_fixup_phb)
 		ppc_md.pcibios_fixup_phb(hose);
 
+#ifdef CONFIG_PCI_IOV
+	if (ppc_md.pcibios_fixup_sriov)
+		ppc_md.pcibios_fixup_sriov(bus);
+#endif /* CONFIG_PCI_IOV */
+
 	/* Configure PCI Express settings */
 	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
 		struct pci_bus *child;
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 5b78917..7d238ae 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -94,6 +94,10 @@  void pcibios_add_pci_devices(struct pci_bus * bus)
 		 */
 		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
 		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+#ifdef CONFIG_PCI_IOV
+		if (ppc_md.pcibios_fixup_sriov)
+			ppc_md.pcibios_fixup_sriov(bus);
+#endif /* CONFIG_PCI_IOV */
 		pcibios_setup_bus_devices(bus);
 		max = bus->busn_res.start;
 		for (pass = 0; pass < 2; pass++) {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 1b37066..958c7a3 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1749,6 +1749,64 @@  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
 #endif /* CONFIG_PCI_MSI */
 
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct resource *res;
+	int i;
+	resource_size_t size;
+	struct pci_dn *pdn;
+
+	if (!pdev->is_physfn || pdev->is_added)
+		return;
+
+	hose = pci_bus_to_host(pdev->bus);
+	phb = hose->private_data;
+
+	pdn = pci_get_pdn(pdev);
+	pdn->max_vfs = 0;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+		res->end = res->start + size * phb->ioda.total_pe - 1;
+		dev_dbg(&pdev->dev, "                       %pR\n", res);
+		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+				i, res, phb->ioda.total_pe);
+	}
+	pdn->max_vfs = phb->ioda.total_pe;
+}
+
+static void pnv_pci_ioda_fixup_sriov(struct pci_bus *bus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *b;
+
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		b = pdev->subordinate;
+
+		if (b)
+			pnv_pci_ioda_fixup_sriov(b);
+
+		if (pdev->is_added)
+			continue;
+
+		pnv_pci_ioda_fixup_iov_resources(pdev);
+	}
+}
+#endif /* CONFIG_PCI_IOV */
+
 /*
  * This function is supposed to be called on basis of PE from top
  * to bottom style. So the the I/O or MMIO segment assigned to
@@ -2125,6 +2183,9 @@  static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
 	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_sriov;
+#endif /* CONFIG_PCI_IOV */
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */