diff mbox

[V11,08/17] powrepc/pci: Refactor pci_dn

Message ID 1421288887-7765-9-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Delegated to: Benjamin Herrenschmidt
Headers show

Commit Message

Wei Yang Jan. 15, 2015, 2:27 a.m. UTC
From: Gavin Shan <gwshan@linux.vnet.ibm.com>

pci_dn is the extension of PCI device node and it's created from
device node. Unfortunately, VFs that are enabled dynamically by
PF's driver and they don't have corresponding device nodes, and
pci_dn. The patch refactors pci_dn to support VFs:

   * pci_dn is organized as a hierarchy tree. VF's pci_dn is put
     to the child list of pci_dn of PF's bridge. pci_dn of other
     device put to the child list of pci_dn of its upstream bridge.

   * VF's pci_dn is expected to be created dynamically when PF
     enabling VFs. VF's pci_dn will be destroyed when PF disabling
     VFs. pci_dn of other device is still created from device node
     as before.

   * For one particular PCI device (VF or not), its pci_dn can be
     found from pdev->dev.archdata.firmware_data, PCI_DN(devnode),
     or parent's list. The fast path (fetching pci_dn through PCI
     device instance) is populated during early fixup time.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/device.h         |    3 +
 arch/powerpc/include/asm/pci-bridge.h     |   14 +-
 arch/powerpc/kernel/pci_dn.c              |  242 ++++++++++++++++++++++++++++-
 arch/powerpc/platforms/powernv/pci-ioda.c |   16 ++
 4 files changed, 270 insertions(+), 5 deletions(-)

Comments

Bjorn Helgaas Feb. 20, 2015, 11:19 p.m. UTC | #1
On Thu, Jan 15, 2015 at 10:27:58AM +0800, Wei Yang wrote:
> From: Gavin Shan <gwshan@linux.vnet.ibm.com>
> 
> pci_dn is the extension of PCI device node and it's created from
> device node. Unfortunately, VFs that are enabled dynamically by
> PF's driver and they don't have corresponding device nodes, and
> pci_dn. The patch refactors pci_dn to support VFs:
> 
>    * pci_dn is organized as a hierarchy tree. VF's pci_dn is put
>      to the child list of pci_dn of PF's bridge. pci_dn of other
>      device put to the child list of pci_dn of its upstream bridge.
> 
>    * VF's pci_dn is expected to be created dynamically when PF
>      enabling VFs. VF's pci_dn will be destroyed when PF disabling
>      VFs. pci_dn of other device is still created from device node
>      as before.
> 
>    * For one particular PCI device (VF or not), its pci_dn can be
>      found from pdev->dev.archdata.firmware_data, PCI_DN(devnode),
>      or parent's list. The fast path (fetching pci_dn through PCI
>      device instance) is populated during early fixup time.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/device.h         |    3 +
>  arch/powerpc/include/asm/pci-bridge.h     |   14 +-
>  arch/powerpc/kernel/pci_dn.c              |  242 ++++++++++++++++++++++++++++-
>  arch/powerpc/platforms/powernv/pci-ioda.c |   16 ++
>  4 files changed, 270 insertions(+), 5 deletions(-)
> ...

> +#ifdef CONFIG_PCI_IOV
> +static struct pci_dn *add_one_dev_pci_info(struct pci_dn *parent,
> +					   struct pci_dev *pdev,
> +					   int busno, int devfn)
> +{
> +	struct pci_dn *pdn;
> +
> +	/* Except PHB, we always have parent firmware data */
> +	if (!parent)
> +		return NULL;
> +
> +	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
> +	if (!pdn) {
> +		pr_warn("%s: Out of memory !\n", __func__);
> +		return NULL;
> +	}
> +
> +	pdn->phb = parent->phb;
> +	pdn->parent = parent;
> +	pdn->busno = busno;
> +	pdn->devfn = devfn;
> +#ifdef CONFIG_PPC_POWERNV
> +	pdn->pe_number = IODA_INVALID_PE;
> +#endif
> +	INIT_LIST_HEAD(&pdn->child_list);
> +	INIT_LIST_HEAD(&pdn->list);
> +	list_add_tail(&pdn->list, &parent->child_list);
> +
> +	/*
> +	 * If we already have PCI device instance, lets
> +	 * bind them.
> +	 */
> +	if (pdev)
> +		pdev->dev.archdata.firmware_data = pdn;
> +
> +	return pdn;

I'd like to see this done in pcibios_add_device(), as I mentioned in
response to "[PATCH V11 01/17] PCI/IOV: Export interface for retrieve VF's
BDF".  Maybe that's not feasible for some reason, but it would be a nicer
design if it's possible.

The remove_dev_pci_info() work would be done in pcibios_release_device()
then, of course.

> +}
> +#endif // CONFIG_PCI_IOV
> +
> +struct pci_dn *add_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
> +{
> +#ifdef CONFIG_PCI_IOV
> +	struct pci_dn *parent, *pdn;
> +	int i;
> +
> +	/* Only support IOV for now */
> +	if (!pdev->is_physfn)
> +		return pci_get_pdn(pdev);
> +
> +	/* Check if VFs have been populated */
> +	pdn = pci_get_pdn(pdev);
> +	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
> +		return NULL;
> +
> +	pdn->flags |= PCI_DN_FLAG_IOV_VF;
> +	parent = pci_bus_to_pdn(pdev->bus);
> +	if (!parent)
>  		return NULL;
> -	return PCI_DN(dn);
> +
> +	for (i = 0; i < vf_num; i++) {
> +		pdn = add_one_dev_pci_info(parent, NULL,
> +					   pci_iov_virtfn_bus(pdev, i),
> +					   pci_iov_virtfn_devfn(pdev, i));
> +		if (!pdn) {
> +			pr_warn("%s: Cannot create firmware data "
> +				"for VF#%d of %s\n",
> +				__func__, i, pci_name(pdev));
> +			return NULL;
> +		}
> +	}
> +#endif
> +
> +	return pci_get_pdn(pdev);
> +}
> +
> +void remove_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
> +{
> +#ifdef CONFIG_PCI_IOV
> +	struct pci_dn *parent;
> +	struct pci_dn *pdn, *tmp;
> +	int i;
> +
> +	/* Only support IOV PF for now */
> +	if (!pdev->is_physfn)
> +		return;
> +
> +	/* Check if VFs have been populated */
> +	pdn = pci_get_pdn(pdev);
> +	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
> +		return;
> +
> +	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
> +	parent = pci_bus_to_pdn(pdev->bus);
> +	if (!parent)
> +		return;
> +
> +	/*
> +	 * We might introduce flag to pci_dn in future
> +	 * so that we can release VF's firmware data in
> +	 * a batch mode.
> +	 */
> +	for (i = 0; i < vf_num; i++) {
> +		list_for_each_entry_safe(pdn, tmp,
> +			&parent->child_list, list) {
> +			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
> +			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
> +				continue;
> +
> +			if (!list_empty(&pdn->list))
> +				list_del(&pdn->list);
> +			kfree(pdn);
> +		}
> +	}
> +#endif
>  }
Gavin Shan Feb. 23, 2015, 12:13 a.m. UTC | #2
On Fri, Feb 20, 2015 at 05:19:17PM -0600, Bjorn Helgaas wrote:
>On Thu, Jan 15, 2015 at 10:27:58AM +0800, Wei Yang wrote:
>> From: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> 
>> pci_dn is the extension of PCI device node and it's created from
>> device node. Unfortunately, VFs that are enabled dynamically by
>> PF's driver and they don't have corresponding device nodes, and
>> pci_dn. The patch refactors pci_dn to support VFs:
>> 
>>    * pci_dn is organized as a hierarchy tree. VF's pci_dn is put
>>      to the child list of pci_dn of PF's bridge. pci_dn of other
>>      device put to the child list of pci_dn of its upstream bridge.
>> 
>>    * VF's pci_dn is expected to be created dynamically when PF
>>      enabling VFs. VF's pci_dn will be destroyed when PF disabling
>>      VFs. pci_dn of other device is still created from device node
>>      as before.
>> 
>>    * For one particular PCI device (VF or not), its pci_dn can be
>>      found from pdev->dev.archdata.firmware_data, PCI_DN(devnode),
>>      or parent's list. The fast path (fetching pci_dn through PCI
>>      device instance) is populated during early fixup time.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/device.h         |    3 +
>>  arch/powerpc/include/asm/pci-bridge.h     |   14 +-
>>  arch/powerpc/kernel/pci_dn.c              |  242 ++++++++++++++++++++++++++++-
>>  arch/powerpc/platforms/powernv/pci-ioda.c |   16 ++
>>  4 files changed, 270 insertions(+), 5 deletions(-)
>> ...
>
>> +#ifdef CONFIG_PCI_IOV
>> +static struct pci_dn *add_one_dev_pci_info(struct pci_dn *parent,
>> +					   struct pci_dev *pdev,
>> +					   int busno, int devfn)
>> +{
>> +	struct pci_dn *pdn;
>> +
>> +	/* Except PHB, we always have parent firmware data */
>> +	if (!parent)
>> +		return NULL;
>> +
>> +	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
>> +	if (!pdn) {
>> +		pr_warn("%s: Out of memory !\n", __func__);
>> +		return NULL;
>> +	}
>> +
>> +	pdn->phb = parent->phb;
>> +	pdn->parent = parent;
>> +	pdn->busno = busno;
>> +	pdn->devfn = devfn;
>> +#ifdef CONFIG_PPC_POWERNV
>> +	pdn->pe_number = IODA_INVALID_PE;
>> +#endif
>> +	INIT_LIST_HEAD(&pdn->child_list);
>> +	INIT_LIST_HEAD(&pdn->list);
>> +	list_add_tail(&pdn->list, &parent->child_list);
>> +
>> +	/*
>> +	 * If we already have PCI device instance, lets
>> +	 * bind them.
>> +	 */
>> +	if (pdev)
>> +		pdev->dev.archdata.firmware_data = pdn;
>> +
>> +	return pdn;
>
>I'd like to see this done in pcibios_add_device(), as I mentioned in
>response to "[PATCH V11 01/17] PCI/IOV: Export interface for retrieve VF's
>BDF".  Maybe that's not feasible for some reason, but it would be a nicer
>design if it's possible.
>
>The remove_dev_pci_info() work would be done in pcibios_release_device()
>then, of course.
>

Yes, it's not feasible. PCI config accessors rely on VF's pci_dn. Before
calling pcibios_add_device(), we need access VF's config space. That means
we need VF's pci_dn before pci_setup_device() as follows:

    sriov_enable()
        pcibios_sriov_enable();     /* Currently, VF's pci_dn is created at this point */
        virtfn_add();
            virtfn_add_bus();       /* Create virtual bus if necessary */
                                    /* ---> A */
            pci_alloc_dev();        /* ---> B */
            pci_setup_device(vf);   /* Access VF's config space */
                pci_read_config_byte(vf, PCI_HEADER_TYPE);
                pci_read_config_dword(vf, PCI_CLASS_REVISION);
                pci_fixup_device(pci_fixup_early, vf);
                pci_read_irq();
                pci_read_bases();
            pci_device_add(vf);
                device_initialize(&vf->dev);
                pci_fixup_device(pci_fixup_header, vf);
                pci_init_capabilities(vf);
                pcibios_add_device(vf);

We have couple of options here:

1) Keep current code. VF's pci_dn is going to be destroyed in
   pcibios_sriov_disable() as we're doing currently.
2) Introduce pcibios_iov_virtfn_add() (at A) for platform to override.
   VF's pci_dn is going to be destroyed in pcibios_release_device().
3) Introduce pcibios_alloc_dev() (at B) for platform to override. The
   VF's pci_dn is going to be destroyed in pcibios_release_device().

Thanks,
Gavin

>> +}
>> +#endif // CONFIG_PCI_IOV
>> +
>> +struct pci_dn *add_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
>> +{
>> +#ifdef CONFIG_PCI_IOV
>> +	struct pci_dn *parent, *pdn;
>> +	int i;
>> +
>> +	/* Only support IOV for now */
>> +	if (!pdev->is_physfn)
>> +		return pci_get_pdn(pdev);
>> +
>> +	/* Check if VFs have been populated */
>> +	pdn = pci_get_pdn(pdev);
>> +	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
>> +		return NULL;
>> +
>> +	pdn->flags |= PCI_DN_FLAG_IOV_VF;
>> +	parent = pci_bus_to_pdn(pdev->bus);
>> +	if (!parent)
>>  		return NULL;
>> -	return PCI_DN(dn);
>> +
>> +	for (i = 0; i < vf_num; i++) {
>> +		pdn = add_one_dev_pci_info(parent, NULL,
>> +					   pci_iov_virtfn_bus(pdev, i),
>> +					   pci_iov_virtfn_devfn(pdev, i));
>> +		if (!pdn) {
>> +			pr_warn("%s: Cannot create firmware data "
>> +				"for VF#%d of %s\n",
>> +				__func__, i, pci_name(pdev));
>> +			return NULL;
>> +		}
>> +	}
>> +#endif
>> +
>> +	return pci_get_pdn(pdev);
>> +}
>> +
>> +void remove_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
>> +{
>> +#ifdef CONFIG_PCI_IOV
>> +	struct pci_dn *parent;
>> +	struct pci_dn *pdn, *tmp;
>> +	int i;
>> +
>> +	/* Only support IOV PF for now */
>> +	if (!pdev->is_physfn)
>> +		return;
>> +
>> +	/* Check if VFs have been populated */
>> +	pdn = pci_get_pdn(pdev);
>> +	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
>> +		return;
>> +
>> +	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
>> +	parent = pci_bus_to_pdn(pdev->bus);
>> +	if (!parent)
>> +		return;
>> +
>> +	/*
>> +	 * We might introduce flag to pci_dn in future
>> +	 * so that we can release VF's firmware data in
>> +	 * a batch mode.
>> +	 */
>> +	for (i = 0; i < vf_num; i++) {
>> +		list_for_each_entry_safe(pdn, tmp,
>> +			&parent->child_list, list) {
>> +			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
>> +			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
>> +				continue;
>> +
>> +			if (!list_empty(&pdn->list))
>> +				list_del(&pdn->list);
>> +			kfree(pdn);
>> +		}
>> +	}
>> +#endif
>>  }
>
Bjorn Helgaas Feb. 24, 2015, 8:13 a.m. UTC | #3
On Mon, Feb 23, 2015 at 11:13:49AM +1100, Gavin Shan wrote:
> On Fri, Feb 20, 2015 at 05:19:17PM -0600, Bjorn Helgaas wrote:
> >On Thu, Jan 15, 2015 at 10:27:58AM +0800, Wei Yang wrote:
> >> From: Gavin Shan <gwshan@linux.vnet.ibm.com>
> >> 
> >> pci_dn is the extension of PCI device node and it's created from
> >> device node. Unfortunately, VFs that are enabled dynamically by
> >> PF's driver and they don't have corresponding device nodes, and
> >> pci_dn. The patch refactors pci_dn to support VFs:
> >> 
> >>    * pci_dn is organized as a hierarchy tree. VF's pci_dn is put
> >>      to the child list of pci_dn of PF's bridge. pci_dn of other
> >>      device put to the child list of pci_dn of its upstream bridge.
> >> 
> >>    * VF's pci_dn is expected to be created dynamically when PF
> >>      enabling VFs. VF's pci_dn will be destroyed when PF disabling
> >>      VFs. pci_dn of other device is still created from device node
> >>      as before.
> >> 
> >>    * For one particular PCI device (VF or not), its pci_dn can be
> >>      found from pdev->dev.archdata.firmware_data, PCI_DN(devnode),
> >>      or parent's list. The fast path (fetching pci_dn through PCI
> >>      device instance) is populated during early fixup time.
> >> 
> >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> >> ---
> >>  arch/powerpc/include/asm/device.h         |    3 +
> >>  arch/powerpc/include/asm/pci-bridge.h     |   14 +-
> >>  arch/powerpc/kernel/pci_dn.c              |  242 ++++++++++++++++++++++++++++-
> >>  arch/powerpc/platforms/powernv/pci-ioda.c |   16 ++
> >>  4 files changed, 270 insertions(+), 5 deletions(-)
> >> ...
> >
> >> +#ifdef CONFIG_PCI_IOV
> >> +static struct pci_dn *add_one_dev_pci_info(struct pci_dn *parent,
> >> +					   struct pci_dev *pdev,
> >> +					   int busno, int devfn)
> >> +{
> >> +	struct pci_dn *pdn;
> >> +
> >> +	/* Except PHB, we always have parent firmware data */
> >> +	if (!parent)
> >> +		return NULL;
> >> +
> >> +	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
> >> +	if (!pdn) {
> >> +		pr_warn("%s: Out of memory !\n", __func__);
> >> +		return NULL;
> >> +	}
> >> +
> >> +	pdn->phb = parent->phb;
> >> +	pdn->parent = parent;
> >> +	pdn->busno = busno;
> >> +	pdn->devfn = devfn;
> >> +#ifdef CONFIG_PPC_POWERNV
> >> +	pdn->pe_number = IODA_INVALID_PE;
> >> +#endif
> >> +	INIT_LIST_HEAD(&pdn->child_list);
> >> +	INIT_LIST_HEAD(&pdn->list);
> >> +	list_add_tail(&pdn->list, &parent->child_list);
> >> +
> >> +	/*
> >> +	 * If we already have PCI device instance, lets
> >> +	 * bind them.
> >> +	 */
> >> +	if (pdev)
> >> +		pdev->dev.archdata.firmware_data = pdn;
> >> +
> >> +	return pdn;
> >
> >I'd like to see this done in pcibios_add_device(), as I mentioned in
> >response to "[PATCH V11 01/17] PCI/IOV: Export interface for retrieve VF's
> >BDF".  Maybe that's not feasible for some reason, but it would be a nicer
> >design if it's possible.
> >
> >The remove_dev_pci_info() work would be done in pcibios_release_device()
> >then, of course.
> >
> 
> Yes, it's not feasible. PCI config accessors rely on VF's pci_dn. Before
> calling pcibios_add_device(), we need access VF's config space.  That means
> we need VF's pci_dn before pci_setup_device() as follows:
> 
>     sriov_enable()
>         pcibios_sriov_enable();     /* Currently, VF's pci_dn is created at this point */
>         virtfn_add();
>             virtfn_add_bus();       /* Create virtual bus if necessary */
>                                     /* ---> A */
>             pci_alloc_dev();        /* ---> B */
>             pci_setup_device(vf);   /* Access VF's config space */
>                 pci_read_config_byte(vf, PCI_HEADER_TYPE);
>                 pci_read_config_dword(vf, PCI_CLASS_REVISION);
>                 pci_fixup_device(pci_fixup_early, vf);
>                 pci_read_irq();
>                 pci_read_bases();
>             pci_device_add(vf);
>                 device_initialize(&vf->dev);
>                 pci_fixup_device(pci_fixup_header, vf);
>                 pci_init_capabilities(vf);
>                 pcibios_add_device(vf);
> 
> We have couple of options here:
> 
> 1) Keep current code. VF's pci_dn is going to be destroyed in
>    pcibios_sriov_disable() as we're doing currently.
> 2) Introduce pcibios_iov_virtfn_add() (at A) for platform to override.
>    VF's pci_dn is going to be destroyed in pcibios_release_device().
> 3) Introduce pcibios_alloc_dev() (at B) for platform to override. The
>    VF's pci_dn is going to be destroyed in pcibios_release_device().

Ah, yes, now I see the problem.  I don't really like having to export
pci_iov_virtfn_bus() and pci_iov_virtfn_devfn(), but it's probably not
worth the hassle of changing it, and I think adding more pcibios interfaces
would be even worse.

So let's leave it as-is for now.

> >> +}
> >> +#endif // CONFIG_PCI_IOV
> >> +
> >> +struct pci_dn *add_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
> >> +{
> >> +#ifdef CONFIG_PCI_IOV
> >> +	struct pci_dn *parent, *pdn;
> >> +	int i;
> >> +
> >> +	/* Only support IOV for now */
> >> +	if (!pdev->is_physfn)
> >> +		return pci_get_pdn(pdev);
> >> +
> >> +	/* Check if VFs have been populated */
> >> +	pdn = pci_get_pdn(pdev);
> >> +	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
> >> +		return NULL;
> >> +
> >> +	pdn->flags |= PCI_DN_FLAG_IOV_VF;
> >> +	parent = pci_bus_to_pdn(pdev->bus);
> >> +	if (!parent)
> >>  		return NULL;
> >> -	return PCI_DN(dn);
> >> +
> >> +	for (i = 0; i < vf_num; i++) {
> >> +		pdn = add_one_dev_pci_info(parent, NULL,
> >> +					   pci_iov_virtfn_bus(pdev, i),
> >> +					   pci_iov_virtfn_devfn(pdev, i));
> >> +		if (!pdn) {
> >> +			pr_warn("%s: Cannot create firmware data "
> >> +				"for VF#%d of %s\n",
> >> +				__func__, i, pci_name(pdev));
> >> +			return NULL;
> >> +		}
> >> +	}
> >> +#endif
> >> +
> >> +	return pci_get_pdn(pdev);
> >> +}
> >> +
> >> +void remove_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
> >> +{
> >> +#ifdef CONFIG_PCI_IOV
> >> +	struct pci_dn *parent;
> >> +	struct pci_dn *pdn, *tmp;
> >> +	int i;
> >> +
> >> +	/* Only support IOV PF for now */
> >> +	if (!pdev->is_physfn)
> >> +		return;
> >> +
> >> +	/* Check if VFs have been populated */
> >> +	pdn = pci_get_pdn(pdev);
> >> +	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
> >> +		return;
> >> +
> >> +	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
> >> +	parent = pci_bus_to_pdn(pdev->bus);
> >> +	if (!parent)
> >> +		return;
> >> +
> >> +	/*
> >> +	 * We might introduce flag to pci_dn in future
> >> +	 * so that we can release VF's firmware data in
> >> +	 * a batch mode.
> >> +	 */
> >> +	for (i = 0; i < vf_num; i++) {
> >> +		list_for_each_entry_safe(pdn, tmp,
> >> +			&parent->child_list, list) {
> >> +			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
> >> +			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
> >> +				continue;
> >> +
> >> +			if (!list_empty(&pdn->list))
> >> +				list_del(&pdn->list);
> >> +			kfree(pdn);
> >> +		}
> >> +	}
> >> +#endif
> >>  }
> >
>
Benjamin Herrenschmidt Feb. 24, 2015, 8:25 a.m. UTC | #4
On Tue, 2015-02-24 at 02:13 -0600, Bjorn Helgaas wrote:
> 
> Ah, yes, now I see the problem.  I don't really like having to export
> pci_iov_virtfn_bus() and pci_iov_virtfn_devfn(), but it's probably not
> worth the hassle of changing it, and I think adding more pcibios
> interfaces
> would be even worse.

Aren't we going to eventually turn them all into host bridge ops ? :-)

Cheers,
Ben.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 38faede..29992cd 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -34,6 +34,9 @@  struct dev_archdata {
 #ifdef CONFIG_SWIOTLB
 	dma_addr_t		max_direct_dma_addr;
 #endif
+#ifdef CONFIG_PPC64
+	void			*firmware_data;
+#endif
 #ifdef CONFIG_EEH
 	struct eeh_dev		*edev;
 #endif
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 725247b..c1b7dd5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -89,6 +89,7 @@  struct pci_controller {
 
 #ifdef CONFIG_PPC64
 	unsigned long buid;
+	void *firmware_data;
 #endif	/* CONFIG_PPC64 */
 
 	void *private_data;
@@ -150,9 +151,13 @@  static inline int isa_vaddr_is_ioport(void __iomem *address)
 struct iommu_table;
 
 struct pci_dn {
+	int     flags;
+#define PCI_DN_FLAG_IOV_VF     0x01
+
 	int	busno;			/* pci bus number */
 	int	devfn;			/* pci device and function number */
 
+	struct  pci_dn *parent;
 	struct  pci_controller *phb;	/* for pci devices */
 	struct	iommu_table *iommu_table;	/* for phb's or bridges */
 	struct	device_node *node;	/* back-pointer to the device_node */
@@ -167,14 +172,19 @@  struct pci_dn {
 #ifdef CONFIG_PPC_POWERNV
 	int	pe_number;
 #endif
+	struct list_head child_list;
+	struct list_head list;
 };
 
 /* Get the pointer to a device_node's pci_dn */
 #define PCI_DN(dn)	((struct pci_dn *) (dn)->data)
 
+extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+					   int devfn);
 extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
-
-extern void * update_dn_pci_info(struct device_node *dn, void *data);
+extern struct pci_dn *add_dev_pci_info(struct pci_dev *pdev, u16 vf_num);
+extern void remove_dev_pci_info(struct pci_dev *pdev, u16 vf_num);
+extern void *update_dn_pci_info(struct device_node *dn, void *data);
 
 static inline int pci_device_from_OF_node(struct device_node *np,
 					  u8 *bus, u8 *devfn)
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 1f61fab..6536573 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -32,12 +32,224 @@ 
 #include <asm/ppc-pci.h>
 #include <asm/firmware.h>
 
+/*
+ * The function is used to find the firmware data of one
+ * specific PCI device, which is attached to the indicated
+ * PCI bus. For VFs, their firmware data is linked to that
+ * one of PF's bridge. For other devices, their firmware
+ * data is linked to that of their bridge.
+ */
+static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+{
+	struct pci_bus *pbus;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	/*
+	 * We probably have virtual bus which doesn't
+	 * have associated bridge.
+	 */
+	pbus = bus;
+	while (pbus) {
+		if (pci_is_root_bus(pbus) || pbus->self)
+			break;
+
+		pbus = pbus->parent;
+	}
+
+	/*
+	 * Except virtual bus, all PCI buses should
+	 * have device nodes.
+	 */
+	dn = pci_bus_to_OF_node(pbus);
+	pdn = dn ? PCI_DN(dn) : NULL;
+
+	return pdn;
+}
+
+struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+				    int devfn)
+{
+	struct device_node *dn = NULL;
+	struct pci_dn *parent, *pdn;
+	struct pci_dev *pdev = NULL;
+
+	/* Fast path: fetch from PCI device */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (pdev->devfn == devfn) {
+			if (pdev->dev.archdata.firmware_data)
+				return pdev->dev.archdata.firmware_data;
+
+			dn = pci_device_to_OF_node(pdev);
+			break;
+		}
+	}
+
+	/* Fast path: fetch from device node */
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/* Slow path: fetch from firmware data hierarchy */
+	parent = pci_bus_to_pdn(bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == bus->number &&
+                    pdn->devfn == devfn)
+                        return pdn;
+        }
+
+	return NULL;
+}
+
 struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 {
-	struct device_node *dn = pci_device_to_OF_node(pdev);
-	if (!dn)
+	struct device_node *dn;
+	struct pci_dn *parent, *pdn;
+
+	/* Search device directly */
+	if (pdev->dev.archdata.firmware_data)
+		return pdev->dev.archdata.firmware_data;
+
+	/* Check device node */
+	dn = pci_device_to_OF_node(pdev);
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/*
+	 * VFs don't have device nodes. We hook their
+	 * firmware data to PF's bridge.
+	 */
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == pdev->bus->number &&
+		    pdn->devfn == pdev->devfn)
+			return pdn;
+	}
+
+	return NULL;
+}
+
+#ifdef CONFIG_PCI_IOV
+static struct pci_dn *add_one_dev_pci_info(struct pci_dn *parent,
+					   struct pci_dev *pdev,
+					   int busno, int devfn)
+{
+	struct pci_dn *pdn;
+
+	/* Except PHB, we always have parent firmware data */
+	if (!parent)
+		return NULL;
+
+	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+	if (!pdn) {
+		pr_warn("%s: Out of memory !\n", __func__);
+		return NULL;
+	}
+
+	pdn->phb = parent->phb;
+	pdn->parent = parent;
+	pdn->busno = busno;
+	pdn->devfn = devfn;
+#ifdef CONFIG_PPC_POWERNV
+	pdn->pe_number = IODA_INVALID_PE;
+#endif
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	list_add_tail(&pdn->list, &parent->child_list);
+
+	/*
+	 * If we already have PCI device instance, lets
+	 * bind them.
+	 */
+	if (pdev)
+		pdev->dev.archdata.firmware_data = pdn;
+
+	return pdn;
+}
+#endif // CONFIG_PCI_IOV
+
+struct pci_dn *add_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent, *pdn;
+	int i;
+
+	/* Only support IOV for now */
+	if (!pdev->is_physfn)
+		return pci_get_pdn(pdev);
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return NULL;
+
+	pdn->flags |= PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
 		return NULL;
-	return PCI_DN(dn);
+
+	for (i = 0; i < vf_num; i++) {
+		pdn = add_one_dev_pci_info(parent, NULL,
+					   pci_iov_virtfn_bus(pdev, i),
+					   pci_iov_virtfn_devfn(pdev, i));
+		if (!pdn) {
+			pr_warn("%s: Cannot create firmware data "
+				"for VF#%d of %s\n",
+				__func__, i, pci_name(pdev));
+			return NULL;
+		}
+	}
+#endif
+
+	return pci_get_pdn(pdev);
+}
+
+void remove_dev_pci_info(struct pci_dev *pdev, u16 vf_num)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dn *parent;
+	struct pci_dn *pdn, *tmp;
+	int i;
+
+	/* Only support IOV PF for now */
+	if (!pdev->is_physfn)
+		return;
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return;
+
+	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return;
+
+	/*
+	 * We might introduce flag to pci_dn in future
+	 * so that we can release VF's firmware data in
+	 * a batch mode.
+	 */
+	for (i = 0; i < vf_num; i++) {
+		list_for_each_entry_safe(pdn, tmp,
+			&parent->child_list, list) {
+			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
+			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
+				continue;
+
+			if (!list_empty(&pdn->list))
+				list_del(&pdn->list);
+			kfree(pdn);
+		}
+	}
+#endif
 }
 
 /*
@@ -49,6 +261,7 @@  void *update_dn_pci_info(struct device_node *dn, void *data)
 	struct pci_controller *phb = data;
 	const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
 	const __be32 *regs;
+	struct device_node *parent;
 	struct pci_dn *pdn;
 
 	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
@@ -70,6 +283,15 @@  void *update_dn_pci_info(struct device_node *dn, void *data)
 	}
 
 	pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
+
+	/* Attach to parent node */
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	parent = of_get_parent(dn);
+	pdn->parent = parent ? PCI_DN(parent) : NULL;
+	if (pdn->parent)
+		list_add_tail(&pdn->list, &pdn->parent->child_list);
+
 	return NULL;
 }
 
@@ -150,6 +372,7 @@  void pci_devs_phb_init_dynamic(struct pci_controller *phb)
 	if (pdn) {
 		pdn->devfn = pdn->busno = -1;
 		pdn->phb = phb;
+		phb->firmware_data = pdn;
 	}
 
 	/* Update dn->phb ptrs for new phb and children devices */
@@ -173,3 +396,16 @@  void __init pci_devs_phb_init(void)
 	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
 		pci_devs_phb_init_dynamic(phb);
 }
+
+static void pci_dev_pdn_setup(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	if (pdev->dev.archdata.firmware_data)
+		return;
+
+	/* Setup the fast path */
+	pdn = pci_get_pdn(pdev);
+	pdev->dev.archdata.firmware_data = pdn;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index fac88ed..5a8e6b1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -949,6 +949,22 @@  static void pnv_pci_ioda_setup_PEs(void)
 	}
 }
 
+#ifdef CONFIG_PCI_IOV
+int pcibios_sriov_disable(struct pci_dev *pdev, u16 vf_num)
+{
+	/* Release firmware data */
+	remove_dev_pci_info(pdev, vf_num);
+	return 0;
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
+{
+	/* Allocate firmware data */
+	add_dev_pci_info(pdev, vf_num);
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);