diff mbox

[RFC,V3,14/17] ppc/pci: create/release dev-tree node for VFs

Message ID 1402365399-5121-15-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Wei Yang June 10, 2014, 1:56 a.m. UTC
Currently, powernv platform is not aware of VFs. This means no dev-node
represents a VF. Also, VF PCI device is created when PF driver want to enable
it. This leads to the pdn->pdev and pdn->pe_number an invalid value.

This patch create/release dev-node for VF and fixs this when a VF's pci_dev
is created.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Kconfig    |    1 +
 arch/powerpc/platforms/powernv/pci-ioda.c |  103 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.c      |   20 ++++++
 3 files changed, 124 insertions(+)

Comments

Grant Likely June 18, 2014, 6:26 p.m. UTC | #1
On Tue, Jun 10, 2014 at 2:56 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
> Currently, powernv platform is not aware of VFs. This means no dev-node
> represents a VF. Also, VF PCI device is created when PF driver want to enable
> it. This leads to the pdn->pdev and pdn->pe_number an invalid value.
>
> This patch create/release dev-node for VF and fixs this when a VF's pci_dev
> is created.
>
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>

I don't think this is the right way to handle this. Unless it is a
fixup to a buggy devicetree provided by firmware, I don't want to see
any code modifying the devicetree to describe stuff that is able to be
directly enumerated. Really the pci code should handle the lack of a
device_node gracefully. If it cannot then it should be fixed.

g.

> ---
>  arch/powerpc/platforms/powernv/Kconfig    |    1 +
>  arch/powerpc/platforms/powernv/pci-ioda.c |  103 +++++++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/pci.c      |   20 ++++++
>  3 files changed, 124 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
> index 895e8a2..0dd331b 100644
> --- a/arch/powerpc/platforms/powernv/Kconfig
> +++ b/arch/powerpc/platforms/powernv/Kconfig
> @@ -11,6 +11,7 @@ config PPC_POWERNV
>         select PPC_UDBG_16550
>         select PPC_SCOM
>         select ARCH_RANDOM
> +       select OF_DYNAMIC
>         default y
>
>  config PPC_POWERNV_RTAS
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index e46c5bf..9ace027 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -23,6 +23,7 @@
>  #include <linux/io.h>
>  #include <linux/msi.h>
>  #include <linux/memblock.h>
> +#include <linux/of_pci.h>
>
>  #include <asm/sections.h>
>  #include <asm/io.h>
> @@ -771,6 +772,108 @@ static void pnv_pci_ioda_setup_PEs(void)
>         }
>  }
>
> +#ifdef CONFIG_PCI_IOV
> +static void pnv_pci_create_vf_node(struct pci_dev *dev, u16 vf_num)
> +{
> +       struct device_node *dn, *p_dn;
> +       struct pci_dn *pdn;
> +       struct pci_controller *hose;
> +       struct property *pp;
> +       void* value;
> +       u16 id;
> +
> +       hose = pci_bus_to_host(dev->bus);
> +
> +       /* Create dev-tree node for VFs if this is a PF */
> +       p_dn = pci_bus_to_OF_node(dev->bus);
> +       if (p_dn == NULL) {
> +               dev_err(&dev->dev, "SRIOV: VF bus NULL device node\n");
> +               return;
> +       }
> +
> +       for (id = 0; id < vf_num; id++) {
> +               dn = kzalloc(sizeof(*dn), GFP_KERNEL);
> +               pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
> +               pp  = kzalloc(sizeof(*pp), GFP_KERNEL);
> +               value = kzalloc(sizeof(u32), GFP_KERNEL);
> +
> +               if (!dn || !pdn || !pp || !value) {
> +                       kfree(dn);
> +                       kfree(pdn);
> +                       kfree(pp);
> +                       kfree(value);
> +                       dev_warn(&dev->dev, "%s: failed to create"
> +                               "dev-tree node for idx(%d)\n",
> +                               __func__, id);
> +
> +                       break;
> +               }
> +
> +               pp->value = value;
> +               pdn->node = dn;
> +               pdn->devfn = pci_iov_virtfn_devfn(dev, id);
> +               pdn->busno = dev->bus->number;
> +               pdn->pe_number = IODA_INVALID_PE;
> +               pdn->phb = hose;
> +
> +               dn->data = pdn;
> +               kref_init(&dn->kref);
> +               dn->full_name = dn->name =
> +                       kasprintf(GFP_KERNEL, "%s/vf%d",
> +                               p_dn->full_name, pdn->devfn);
> +               dn->parent = p_dn;
> +
> +               pp->name = kasprintf(GFP_KERNEL, "reg");
> +               pp->length = 5 * sizeof(__be32);
> +               *(u32*)pp->value = cpu_to_be32(pdn->devfn) << 8;
> +               dn->properties = pp;
> +
> +               of_attach_node(dn);
> +       }
> +}
> +
> +static void pnv_pci_release_vf_node(struct pci_dev *dev, u16 vf_num)
> +{
> +       struct device_node *dn;
> +       struct property *pp;
> +       u16 id;
> +
> +       for (id = 0; id < vf_num; id++) {
> +               dn = of_pci_find_child_device(dev->bus->dev.of_node,
> +                               pci_iov_virtfn_devfn(dev, id));
> +               if (!dn)
> +                       continue;
> +
> +               of_detach_node(dn);
> +               pp = dn->properties;
> +               kfree(pp->name);
> +               kfree(pp->value);
> +               kfree(pp);
> +               kfree(dn->data);
> +               kfree(dn);
> +       }
> +}
> +
> +int pcibios_sriov_disable(struct pci_dev *pdev)
> +{
> +       struct pci_sriov *iov;
> +       u16 vf_num;
> +
> +       iov = pdev->sriov;
> +       vf_num = iov->num_VFs;
> +       pnv_pci_release_vf_node(pdev, vf_num);
> +
> +       return 0;
> +}
> +
> +int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
> +{
> +       pnv_pci_create_vf_node(pdev, vf_num);
> +
> +       return 0;
> +}
> +#endif /* CONFIG_PCI_IOV */
> +
>  static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
>  {
>         struct pci_dn *pdn = pci_get_pdn(pdev);
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 687a068..43fcc73 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -654,6 +654,26 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>  {
>         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>         struct pnv_phb *phb = hose->private_data;
> +#ifdef CONFIG_PCI_IOV
> +       struct pnv_ioda_pe *pe;
> +       struct pci_dn *pdn;
> +
> +       /* Fix the VF pdn PE number */
> +       if (pdev->is_virtfn) {
> +               pdn = pci_get_pdn(pdev);
> +               if (pdn->pcidev == NULL || pdn->pe_number == IODA_INVALID_PE) {
> +                       list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> +                               if (pe->rid ==
> +                                       ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) {
> +                                       pdn->pcidev = pdev;
> +                                       pdn->pe_number = pe->pe_number;
> +                                       pe->pdev = pdev;
> +                                       break;
> +                               }
> +                       }
> +               }
> +       }
> +#endif /* CONFIG_PCI_IOV */
>
>         /* If we have no phb structure, try to setup a fallback based on
>          * the device-tree (RTAS PCI for example)
> --
> 1.7.9.5
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Benjamin Herrenschmidt June 18, 2014, 8:51 p.m. UTC | #2
On Wed, 2014-06-18 at 19:26 +0100, Grant Likely wrote:
> I don't think this is the right way to handle this. Unless it is a
> fixup to a buggy devicetree provided by firmware, I don't want to see
> any code modifying the devicetree to describe stuff that is able to be
> directly enumerated. Really the pci code should handle the lack of a
> device_node gracefully. If it cannot then it should be fixed.

Right, I've long said that we need to get rid of that "pci_dn" structure
we've been carrying around forever on ppc64.

Any auxiliary data structures we keep around associated with a PCI
device should be pointed to by the pci_dev itself, possibly using
firmware_data or similar.

Cheers,
Ben.
Wei Yang June 19, 2014, 2:46 a.m. UTC | #3
On Wed, Jun 18, 2014 at 07:26:27PM +0100, Grant Likely wrote:
>On Tue, Jun 10, 2014 at 2:56 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>> Currently, powernv platform is not aware of VFs. This means no dev-node
>> represents a VF. Also, VF PCI device is created when PF driver want to enable
>> it. This leads to the pdn->pdev and pdn->pe_number an invalid value.
>>
>> This patch create/release dev-node for VF and fixs this when a VF's pci_dev
>> is created.
>>
>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>
>I don't think this is the right way to handle this. Unless it is a
>fixup to a buggy devicetree provided by firmware, I don't want to see
>any code modifying the devicetree to describe stuff that is able to be
>directly enumerated. Really the pci code should handle the lack of a
>device_node gracefully. If it cannot then it should be fixed.

Grant,

Glad to see your comment.

I will fix this in the firmware.

>
>g.
>
>> ---
>>  arch/powerpc/platforms/powernv/Kconfig    |    1 +
>>  arch/powerpc/platforms/powernv/pci-ioda.c |  103 +++++++++++++++++++++++++++++
>>  arch/powerpc/platforms/powernv/pci.c      |   20 ++++++
>>  3 files changed, 124 insertions(+)
>>
>> diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
>> index 895e8a2..0dd331b 100644
>> --- a/arch/powerpc/platforms/powernv/Kconfig
>> +++ b/arch/powerpc/platforms/powernv/Kconfig
>> @@ -11,6 +11,7 @@ config PPC_POWERNV
>>         select PPC_UDBG_16550
>>         select PPC_SCOM
>>         select ARCH_RANDOM
>> +       select OF_DYNAMIC
>>         default y
>>
>>  config PPC_POWERNV_RTAS
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index e46c5bf..9ace027 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -23,6 +23,7 @@
>>  #include <linux/io.h>
>>  #include <linux/msi.h>
>>  #include <linux/memblock.h>
>> +#include <linux/of_pci.h>
>>
>>  #include <asm/sections.h>
>>  #include <asm/io.h>
>> @@ -771,6 +772,108 @@ static void pnv_pci_ioda_setup_PEs(void)
>>         }
>>  }
>>
>> +#ifdef CONFIG_PCI_IOV
>> +static void pnv_pci_create_vf_node(struct pci_dev *dev, u16 vf_num)
>> +{
>> +       struct device_node *dn, *p_dn;
>> +       struct pci_dn *pdn;
>> +       struct pci_controller *hose;
>> +       struct property *pp;
>> +       void* value;
>> +       u16 id;
>> +
>> +       hose = pci_bus_to_host(dev->bus);
>> +
>> +       /* Create dev-tree node for VFs if this is a PF */
>> +       p_dn = pci_bus_to_OF_node(dev->bus);
>> +       if (p_dn == NULL) {
>> +               dev_err(&dev->dev, "SRIOV: VF bus NULL device node\n");
>> +               return;
>> +       }
>> +
>> +       for (id = 0; id < vf_num; id++) {
>> +               dn = kzalloc(sizeof(*dn), GFP_KERNEL);
>> +               pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
>> +               pp  = kzalloc(sizeof(*pp), GFP_KERNEL);
>> +               value = kzalloc(sizeof(u32), GFP_KERNEL);
>> +
>> +               if (!dn || !pdn || !pp || !value) {
>> +                       kfree(dn);
>> +                       kfree(pdn);
>> +                       kfree(pp);
>> +                       kfree(value);
>> +                       dev_warn(&dev->dev, "%s: failed to create"
>> +                               "dev-tree node for idx(%d)\n",
>> +                               __func__, id);
>> +
>> +                       break;
>> +               }
>> +
>> +               pp->value = value;
>> +               pdn->node = dn;
>> +               pdn->devfn = pci_iov_virtfn_devfn(dev, id);
>> +               pdn->busno = dev->bus->number;
>> +               pdn->pe_number = IODA_INVALID_PE;
>> +               pdn->phb = hose;
>> +
>> +               dn->data = pdn;
>> +               kref_init(&dn->kref);
>> +               dn->full_name = dn->name =
>> +                       kasprintf(GFP_KERNEL, "%s/vf%d",
>> +                               p_dn->full_name, pdn->devfn);
>> +               dn->parent = p_dn;
>> +
>> +               pp->name = kasprintf(GFP_KERNEL, "reg");
>> +               pp->length = 5 * sizeof(__be32);
>> +               *(u32*)pp->value = cpu_to_be32(pdn->devfn) << 8;
>> +               dn->properties = pp;
>> +
>> +               of_attach_node(dn);
>> +       }
>> +}
>> +
>> +static void pnv_pci_release_vf_node(struct pci_dev *dev, u16 vf_num)
>> +{
>> +       struct device_node *dn;
>> +       struct property *pp;
>> +       u16 id;
>> +
>> +       for (id = 0; id < vf_num; id++) {
>> +               dn = of_pci_find_child_device(dev->bus->dev.of_node,
>> +                               pci_iov_virtfn_devfn(dev, id));
>> +               if (!dn)
>> +                       continue;
>> +
>> +               of_detach_node(dn);
>> +               pp = dn->properties;
>> +               kfree(pp->name);
>> +               kfree(pp->value);
>> +               kfree(pp);
>> +               kfree(dn->data);
>> +               kfree(dn);
>> +       }
>> +}
>> +
>> +int pcibios_sriov_disable(struct pci_dev *pdev)
>> +{
>> +       struct pci_sriov *iov;
>> +       u16 vf_num;
>> +
>> +       iov = pdev->sriov;
>> +       vf_num = iov->num_VFs;
>> +       pnv_pci_release_vf_node(pdev, vf_num);
>> +
>> +       return 0;
>> +}
>> +
>> +int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
>> +{
>> +       pnv_pci_create_vf_node(pdev, vf_num);
>> +
>> +       return 0;
>> +}
>> +#endif /* CONFIG_PCI_IOV */
>> +
>>  static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
>>  {
>>         struct pci_dn *pdn = pci_get_pdn(pdev);
>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>> index 687a068..43fcc73 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -654,6 +654,26 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>  {
>>         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>>         struct pnv_phb *phb = hose->private_data;
>> +#ifdef CONFIG_PCI_IOV
>> +       struct pnv_ioda_pe *pe;
>> +       struct pci_dn *pdn;
>> +
>> +       /* Fix the VF pdn PE number */
>> +       if (pdev->is_virtfn) {
>> +               pdn = pci_get_pdn(pdev);
>> +               if (pdn->pcidev == NULL || pdn->pe_number == IODA_INVALID_PE) {
>> +                       list_for_each_entry(pe, &phb->ioda.pe_list, list) {
>> +                               if (pe->rid ==
>> +                                       ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) {
>> +                                       pdn->pcidev = pdev;
>> +                                       pdn->pe_number = pe->pe_number;
>> +                                       pe->pdev = pdev;
>> +                                       break;
>> +                               }
>> +                       }
>> +               }
>> +       }
>> +#endif /* CONFIG_PCI_IOV */
>>
>>         /* If we have no phb structure, try to setup a fallback based on
>>          * the device-tree (RTAS PCI for example)
>> --
>> 1.7.9.5
>>
>> _______________________________________________
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
Grant Likely June 19, 2014, 8:30 a.m. UTC | #4
On Thu, Jun 19, 2014 at 3:46 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
> On Wed, Jun 18, 2014 at 07:26:27PM +0100, Grant Likely wrote:
>>On Tue, Jun 10, 2014 at 2:56 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>>> Currently, powernv platform is not aware of VFs. This means no dev-node
>>> represents a VF. Also, VF PCI device is created when PF driver want to enable
>>> it. This leads to the pdn->pdev and pdn->pe_number an invalid value.
>>>
>>> This patch create/release dev-node for VF and fixs this when a VF's pci_dev
>>> is created.
>>>
>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>
>>I don't think this is the right way to handle this. Unless it is a
>>fixup to a buggy devicetree provided by firmware, I don't want to see
>>any code modifying the devicetree to describe stuff that is able to be
>>directly enumerated. Really the pci code should handle the lack of a
>>device_node gracefully. If it cannot then it should be fixed.
>
> Grant,
>
> Glad to see your comment.
>
> I will fix this in the firmware.

That's not really what I meant. The kernel should be able to deal with
virtual functions even if firmware doesn't know how, and the kernel
should not require modifying the device tree to support them.

I'm saying fix the kernel so that a device node is not necessary for
virtual functions.

g.
Wei Yang June 19, 2014, 9:42 a.m. UTC | #5
On Thu, Jun 19, 2014 at 09:30:47AM +0100, Grant Likely wrote:
>On Thu, Jun 19, 2014 at 3:46 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>> On Wed, Jun 18, 2014 at 07:26:27PM +0100, Grant Likely wrote:
>>>On Tue, Jun 10, 2014 at 2:56 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>>>> Currently, powernv platform is not aware of VFs. This means no dev-node
>>>> represents a VF. Also, VF PCI device is created when PF driver want to enable
>>>> it. This leads to the pdn->pdev and pdn->pe_number an invalid value.
>>>>
>>>> This patch create/release dev-node for VF and fixs this when a VF's pci_dev
>>>> is created.
>>>>
>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>
>>>I don't think this is the right way to handle this. Unless it is a
>>>fixup to a buggy devicetree provided by firmware, I don't want to see
>>>any code modifying the devicetree to describe stuff that is able to be
>>>directly enumerated. Really the pci code should handle the lack of a
>>>device_node gracefully. If it cannot then it should be fixed.
>>
>> Grant,
>>
>> Glad to see your comment.
>>
>> I will fix this in the firmware.
>
>That's not really what I meant. The kernel should be able to deal with
>virtual functions even if firmware doesn't know how, and the kernel
>should not require modifying the device tree to support them.
>
>I'm saying fix the kernel so that a device node is not necessary for
>virtual functions.

oh, sorry for my poor understanding. Let me do some investigation to see
whether it is fine to get rid of device node for vfs.

>
>g.
Wei Yang June 20, 2014, 3:46 a.m. UTC | #6
On Thu, Jun 19, 2014 at 09:30:47AM +0100, Grant Likely wrote:
>On Thu, Jun 19, 2014 at 3:46 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>> On Wed, Jun 18, 2014 at 07:26:27PM +0100, Grant Likely wrote:
>>>On Tue, Jun 10, 2014 at 2:56 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
>>>> Currently, powernv platform is not aware of VFs. This means no dev-node
>>>> represents a VF. Also, VF PCI device is created when PF driver want to enable
>>>> it. This leads to the pdn->pdev and pdn->pe_number an invalid value.
>>>>
>>>> This patch create/release dev-node for VF and fixs this when a VF's pci_dev
>>>> is created.
>>>>
>>>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>
>>>I don't think this is the right way to handle this. Unless it is a
>>>fixup to a buggy devicetree provided by firmware, I don't want to see
>>>any code modifying the devicetree to describe stuff that is able to be
>>>directly enumerated. Really the pci code should handle the lack of a
>>>device_node gracefully. If it cannot then it should be fixed.
>>
>> Grant,
>>
>> Glad to see your comment.
>>
>> I will fix this in the firmware.
>
>That's not really what I meant. The kernel should be able to deal with
>virtual functions even if firmware doesn't know how, and the kernel
>should not require modifying the device tree to support them.
>
>I'm saying fix the kernel so that a device node is not necessary for
>virtual functions.
>
>g.

Grant,

After doing some investigation, I found there are two places might highly rely
on these information. And not only VFs, but also PFs.

1. pnv_pci_read_config()/pnv_pci_cfg_read()
   When doing config space read, this needs the information of the phb.
   In commit 61305a96, the phb is retrived from the bus, and in commit
   9bf41be6 it turns to use the device node for EEH hotplug case. Also VF may
   face similar case for EEH hotplug.(This is under dev)

   To get rid of the device node/pci_dn, we need a special handling for VFs.
   Hmm... it looks not nice.

2. pnv_pci_ioda_dma_dev_setup()/pnv_pci_ioda_dma_set_mask()
   In pci_dn, there is a field: pe_number. This is used to retrive the correct
   PE this pci device associated with.

   If we don't have a pci_dn for a VF, we need to store this information to
   another place. Like in the PF's pci_dn? Hmm... looks not nice neither.

Generally, we could find a workaround make the VFs work without device
node/pci_dn, but it would do some harm to the infrastructure, make it not
consistant and not easy to read/maintain.

Currently I don't find a neat way to just get rid of device node/pci_dn for
VFs only. May require a careful restructure to do so.

BTW, my understanding may not be correct. If you have better idea, please let
me know :-) Thanks a lot.
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 895e8a2..0dd331b 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -11,6 +11,7 @@  config PPC_POWERNV
 	select PPC_UDBG_16550
 	select PPC_SCOM
 	select ARCH_RANDOM
+	select OF_DYNAMIC
 	default y
 
 config PPC_POWERNV_RTAS
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e46c5bf..9ace027 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -23,6 +23,7 @@ 
 #include <linux/io.h>
 #include <linux/msi.h>
 #include <linux/memblock.h>
+#include <linux/of_pci.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -771,6 +772,108 @@  static void pnv_pci_ioda_setup_PEs(void)
 	}
 }
 
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_create_vf_node(struct pci_dev *dev, u16 vf_num)
+{
+	struct device_node *dn, *p_dn;
+	struct pci_dn *pdn;
+	struct pci_controller *hose;
+	struct property *pp;
+	void* value;
+	u16 id;
+
+	hose = pci_bus_to_host(dev->bus);
+
+	/* Create dev-tree node for VFs if this is a PF */
+	p_dn = pci_bus_to_OF_node(dev->bus);
+	if (p_dn == NULL) {
+		dev_err(&dev->dev, "SRIOV: VF bus NULL device node\n");
+		return;
+	}
+
+	for (id = 0; id < vf_num; id++) {
+		dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+		pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+		pp  = kzalloc(sizeof(*pp), GFP_KERNEL);
+		value = kzalloc(sizeof(u32), GFP_KERNEL);
+
+		if (!dn || !pdn || !pp || !value) {
+			kfree(dn);
+			kfree(pdn);
+			kfree(pp);
+			kfree(value);
+			dev_warn(&dev->dev, "%s: failed to create"
+				"dev-tree node for idx(%d)\n",
+				__func__, id);
+
+			break;
+		}
+
+		pp->value = value;
+		pdn->node = dn;
+		pdn->devfn = pci_iov_virtfn_devfn(dev, id);
+		pdn->busno = dev->bus->number;
+		pdn->pe_number = IODA_INVALID_PE;
+		pdn->phb = hose;
+
+		dn->data = pdn;
+		kref_init(&dn->kref);
+		dn->full_name = dn->name =
+			kasprintf(GFP_KERNEL, "%s/vf%d",
+				p_dn->full_name, pdn->devfn);
+		dn->parent = p_dn;
+
+		pp->name = kasprintf(GFP_KERNEL, "reg");
+		pp->length = 5 * sizeof(__be32);
+		*(u32*)pp->value = cpu_to_be32(pdn->devfn) << 8;
+		dn->properties = pp;
+
+		of_attach_node(dn);
+	}
+}
+
+static void pnv_pci_release_vf_node(struct pci_dev *dev, u16 vf_num)
+{
+	struct device_node *dn;
+	struct property *pp;
+	u16 id;
+
+	for (id = 0; id < vf_num; id++) {
+		dn = of_pci_find_child_device(dev->bus->dev.of_node,
+				pci_iov_virtfn_devfn(dev, id));
+		if (!dn)
+			continue;
+
+		of_detach_node(dn);
+		pp = dn->properties;
+		kfree(pp->name);
+		kfree(pp->value);
+		kfree(pp);
+		kfree(dn->data);
+		kfree(dn);
+	}
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	struct pci_sriov *iov;
+	u16 vf_num;
+
+	iov = pdev->sriov;
+	vf_num = iov->num_VFs;
+	pnv_pci_release_vf_node(pdev, vf_num);
+
+	return 0;
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
+{
+	pnv_pci_create_vf_node(pdev, vf_num);
+
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 {
 	struct pci_dn *pdn = pci_get_pdn(pdev);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 687a068..43fcc73 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -654,6 +654,26 @@  static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+	struct pnv_ioda_pe *pe;
+	struct pci_dn *pdn;
+
+	/* Fix the VF pdn PE number */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+		if (pdn->pcidev == NULL || pdn->pe_number == IODA_INVALID_PE) {
+			list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+				if (pe->rid ==
+					((pdev->bus->number << 8) | (pdev->devfn & 0xff))) {
+					pdn->pcidev = pdev;
+					pdn->pe_number = pe->pe_number;
+					pe->pdev = pdev;
+					break;
+				}
+			}
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
 
 	/* If we have no phb structure, try to setup a fallback based on
 	 * the device-tree (RTAS PCI for example)