diff mbox

[v5,3/4] spapr_pci: enumerate and add PCI device tree

Message ID 1432023962-32406-4-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania May 19, 2015, 8:26 a.m. UTC
All the PCI enumeration and device node creation was off-loaded to
SLOF. With PCI hotplug support, code needed to be added to add device
node. This creates multiple copy of the code one in SLOF and other in
hotplug code. To unify this, the patch adds the pci device node
creation in Qemu. For backward compatibility, a flag
"qemu,phb-enumerated" is added to the phb, suggesting to SLOF to not
do device node creation.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[ Squashed Michael's drc_index changes ]
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/spapr_pci.c | 188 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 150 insertions(+), 38 deletions(-)

Comments

Alexey Kardashevskiy May 24, 2015, 11:05 a.m. UTC | #1
On 05/19/2015 06:26 PM, Nikunj A Dadhania wrote:
> All the PCI enumeration and device node creation was off-loaded to
> SLOF. With PCI hotplug support, code needed to be added to add device
> node. This creates multiple copy of the code one in SLOF and other in
> hotplug code. To unify this, the patch adds the pci device node
> creation in Qemu. For backward compatibility, a flag
> "qemu,phb-enumerated" is added to the phb, suggesting to SLOF to not
> do device node creation.
>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> [ Squashed Michael's drc_index changes ]
> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>   hw/ppc/spapr_pci.c | 188 ++++++++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 150 insertions(+), 38 deletions(-)
>
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 8b02a3e..12f1b9c 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -23,6 +23,7 @@
>    * THE SOFTWARE.
>    */
>   #include "hw/hw.h"
> +#include "hw/sysbus.h"
>   #include "hw/pci/pci.h"
>   #include "hw/pci/msi.h"
>   #include "hw/pci/msix.h"
> @@ -35,6 +36,7 @@
>   #include "qemu/error-report.h"
>   #include "qapi/qmp/qerror.h"
>
> +#include "hw/pci/pci_bridge.h"
>   #include "hw/pci/pci_bus.h"
>   #include "hw/ppc/spapr_drc.h"
>   #include "sysemu/device_tree.h"
> @@ -742,6 +744,31 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>       return &phb->iommu_as;
>   }
>
> +
> +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
> +                                               PCIDevice *pdev)
> +{
> +    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
> +    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
> +                                    (phb->index << 16) |
> +                                    (busnr << 8) |
> +                                    pdev->devfn);
> +}
> +
> +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
> +                                            PCIDevice *pdev)
> +{
> +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
> +    sPAPRDRConnectorClass *drck;
> +
> +    if (!drc) {
> +        return 0;
> +    }
> +
> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    return drck->get_index(drc);
> +}
> +
>   /* Macros to operate with address in OF binding to PCI */
>   #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
>   #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
> @@ -879,12 +906,13 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
>   }
>
>   static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
> -                                       int phb_index, int drc_index,
> +                                       sPAPRPHBState *sphb,
>                                          const char *drc_name)
>   {
>       ResourceProps rp;
>       bool is_bridge = false;
>       int pci_status;
> +    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);

Is this drc_index any different from the one which used to be passed to 
this function? If no, then I do not see the point in changing the prototype 
(or make another "this just makes code easier/nicer" patch). If yes, then 
it would be nice to see what the patch changed in this regard in the commit 
log.



>       if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
>           PCI_HEADER_TYPE_BRIDGE) {
> @@ -945,8 +973,13 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>        * processed by OF beforehand
>        */
>       _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
> -    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
> -    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
> +    if (drc_name) {
> +        _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name,
> +                         strlen(drc_name)));
> +    }
> +    if (drc_index) {
> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
> +    }
>
>       _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>                             RESOURCE_CELLS_ADDRESS));
> @@ -963,30 +996,34 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>       return 0;
>   }
>
> +typedef struct sPAPRFDT {
> +    void *fdt;
> +    int node_off;
> +    sPAPRPHBState *sphb;
> +} sPAPRFDT;
> +
>   /* create OF node for pci device and required OF DT properties */
> -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
> -                                       int drc_index, const char *drc_name,
> -                                       int *dt_offset)
> +static int spapr_create_pci_child_dt(PCIDevice *pdev, sPAPRFDT *p,
> +                                     const char *drc_name)

Why s/dev/pdev/?


>   {
> -    void *fdt;
> -    int offset, ret, fdt_size;
> -    int slot = PCI_SLOT(dev->devfn);
> -    int func = PCI_FUNC(dev->devfn);
> -    char nodename[512];
> +    int offset, ret;
> +    char nodename[64];

Why s/512/64/?

This change and the one above hide what the patch really does to 
spapr_create_pci_child_dt.


> +    int slot = PCI_SLOT(pdev->devfn);
> +    int func = PCI_FUNC(pdev->devfn);
>
> -    fdt = create_device_tree(&fdt_size);
>       if (func != 0) {
>           sprintf(nodename, "pci@%d,%d", slot, func);
>       } else {
>           sprintf(nodename, "pci@%d", slot);
>       }
> -    offset = fdt_add_subnode(fdt, 0, nodename);
> -    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
> +    offset = fdt_add_subnode(p->fdt, p->node_off, nodename);
> +    ret = spapr_populate_pci_child_dt(pdev, p->fdt, offset, p->sphb,
>                                         drc_name);
>       g_assert(!ret);
> -
> -    *dt_offset = offset;
> -    return fdt;
> +    if (ret) {
> +        return 0;
> +    }
> +    return offset;
>   }
>
>   static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
> @@ -996,24 +1033,26 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>   {
>       sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>       DeviceState *dev = DEVICE(pdev);
> -    int drc_index = drck->get_index(drc);
>       const char *drc_name = drck->get_name(drc);
> -    void *fdt = NULL;
> -    int fdt_start_offset = 0;
> +    int fdt_start_offset = 0, fdt_size;
> +    sPAPRFDT s_fdt = {NULL, 0, NULL};
>
> -    /* boot-time devices get their device tree node created by SLOF, but for
> -     * hotplugged devices we need QEMU to generate it so the guest can fetch
> -     * it via RTAS
> -     */
>       if (dev->hotplugged) {


I understand the patch is not changing this but still while we are here -
spapr_phb_add_pci_device() is only called from spapr_phb_hot_plug_child(), 
how can dev->hotplugged be not true in this function (if it cannot, you 
could get rid of "out:"?



> -        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
> -                                        &fdt_start_offset);
> +        s_fdt.fdt = create_device_tree(&fdt_size);
> +        s_fdt.sphb = phb;
> +        s_fdt.node_off = 0;
> +        fdt_start_offset = spapr_create_pci_child_dt(pdev, &s_fdt, drc_name);
> +        if (!fdt_start_offset) {
> +            error_setg(errp, "Failed to create pci child device tree node");
> +            goto out;
> +        }
>       }
>
>       drck->attach(drc, DEVICE(pdev),
> -                 fdt, fdt_start_offset, !dev->hotplugged, errp);
> +                 s_fdt.fdt, fdt_start_offset, !dev->hotplugged, errp);
> +out:
>       if (*errp) {
> -        g_free(fdt);
> +        g_free(s_fdt.fdt);
>       }
>   }
>
> @@ -1043,16 +1082,6 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
>       drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
>   }
>
> -static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
> -                                               PCIDevice *pdev)

Just adding forward declaration would make the patch shorter.


> -{
> -    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
> -    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
> -                                    (phb->index << 16) |
> -                                    (busnr << 8) |
> -                                    pdev->devfn);
> -}
> -
>   static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
>                                        DeviceState *plugged_dev, Error **errp)
>   {
> @@ -1482,6 +1511,75 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
>       return PCI_HOST_BRIDGE(dev);
>   }
>
> +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
> +                                          void *opaque)
> +{
> +    PCIBus *sec_bus;
> +    sPAPRFDT *p = opaque;
> +    int offset;
> +    sPAPRFDT s_fdt;
> +
> +    offset = spapr_create_pci_child_dt(pdev, p, NULL);
> +    if (!offset) {
> +        error_report("Failed to create pci child device tree node");
> +        return;
> +    }
> +
> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
> +         PCI_HEADER_TYPE_BRIDGE)) {
> +        return;
> +    }
> +
> +    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
> +    if (!sec_bus) {
> +        return;
> +    }
> +
> +    s_fdt.fdt = p->fdt;
> +    s_fdt.node_off = offset;
> +    s_fdt.sphb = p->sphb;
> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                        spapr_populate_pci_devices_dt,
> +                        &s_fdt);
> +}
> +
> +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
> +                                           void *opaque)
> +{
> +    unsigned int *bus_no = opaque;
> +    unsigned int primary = *bus_no;
> +    unsigned int secondary;
> +    unsigned int subordinate = 0xff;
> +
> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) ==
> +         PCI_HEADER_TYPE_BRIDGE)) {


s/==/!=/ and "return" and no need in extra indent below.


> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
> +        secondary = *bus_no + 1;


(*bus_no)++;
secondary = *bus_no;

and remove "bus_no = *bus_no + 1" below?
In fact, I do not need much sense in having "secondary" variable in this 
function.

> +        pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
> +        pci_default_write_config(pdev, PCI_SECONDARY_BUS, secondary, 1);
> +        pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, secondary, 1);
> +        *bus_no = *bus_no + 1;
> +        if (sec_bus) {

same here? Just like you did in spapr_populate_pci_devices_dt(). I do not 
insist though. But having less scopes just makes it easier/nicer to wrap 
long lines in QEMU coding style (new line starts under "(").


> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
> +            pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                                spapr_phb_pci_enumerate_bridge,
> +                                bus_no);
> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
> +        }
> +    }
> +}
> +
> +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
> +{
> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> +    unsigned int bus_no = 0;
> +
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        spapr_phb_pci_enumerate_bridge,
> +                        &bus_no);
> +
> +}
> +
>   int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                             uint32_t xics_phandle,
>                             void *fdt)
> @@ -1521,6 +1619,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>           cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
>       uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
>       sPAPRTCETable *tcet;
> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> +    sPAPRFDT s_fdt;
>
>       /* Start populating the FDT */
>       sprintf(nodename, "pci@%" PRIx64, phb->buid);
> @@ -1570,6 +1670,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                    tcet->liobn, tcet->bus_offset,
>                    tcet->nb_table << tcet->page_shift);
>
> +    /* Walk the bridges and program the bus numbers*/
> +    spapr_phb_pci_enumerate(phb);
> +    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));


Can we also add a hack here to scan for the "qemu,phb-enumerated" string in 
the SLOF bin image?


> +
> +    /* Populate tree nodes with PCI devices attached */
> +    s_fdt.fdt = fdt;
> +    s_fdt.node_off = bus_off;
> +    s_fdt.sphb = phb;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        spapr_populate_pci_devices_dt,
> +                        &s_fdt);
> +
>       ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
>                                   SPAPR_DR_CONNECTOR_TYPE_PCI);
>       if (ret) {
>
Nikunj A Dadhania May 25, 2015, 4:45 a.m. UTC | #2
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> On 05/19/2015 06:26 PM, Nikunj A Dadhania wrote:
>> All the PCI enumeration and device node creation was off-loaded to
>> SLOF. With PCI hotplug support, code needed to be added to add device
>> node. This creates multiple copy of the code one in SLOF and other in
>> hotplug code. To unify this, the patch adds the pci device node
>> creation in Qemu. For backward compatibility, a flag
>> "qemu,phb-enumerated" is added to the phb, suggesting to SLOF to not
>> do device node creation.
>>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> [ Squashed Michael's drc_index changes ]
>> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>   hw/ppc/spapr_pci.c | 188 ++++++++++++++++++++++++++++++++++++++++++-----------
>>   1 file changed, 150 insertions(+), 38 deletions(-)
>>
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 8b02a3e..12f1b9c 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -23,6 +23,7 @@
>>    * THE SOFTWARE.
>>    */
>>   #include "hw/hw.h"
>> +#include "hw/sysbus.h"
>>   #include "hw/pci/pci.h"
>>   #include "hw/pci/msi.h"
>>   #include "hw/pci/msix.h"
>> @@ -35,6 +36,7 @@
>>   #include "qemu/error-report.h"
>>   #include "qapi/qmp/qerror.h"
>>
>> +#include "hw/pci/pci_bridge.h"
>>   #include "hw/pci/pci_bus.h"
>>   #include "hw/ppc/spapr_drc.h"
>>   #include "sysemu/device_tree.h"
>> @@ -742,6 +744,31 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>       return &phb->iommu_as;
>>   }
>>
>> +
>> +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>> +                                               PCIDevice *pdev)
>> +{
>> +    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>> +    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>> +                                    (phb->index << 16) |
>> +                                    (busnr << 8) |
>> +                                    pdev->devfn);
>> +}
>> +
>> +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
>> +                                            PCIDevice *pdev)
>> +{
>> +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
>> +    sPAPRDRConnectorClass *drck;
>> +
>> +    if (!drc) {
>> +        return 0;
>> +    }
>> +
>> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>> +    return drck->get_index(drc);
>> +}
>> +
>>   /* Macros to operate with address in OF binding to PCI */
>>   #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
>>   #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
>> @@ -879,12 +906,13 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
>>   }
>>
>>   static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>> -                                       int phb_index, int drc_index,
>> +                                       sPAPRPHBState *sphb,
>>                                          const char *drc_name)
>>   {
>>       ResourceProps rp;
>>       bool is_bridge = false;
>>       int pci_status;
>> +    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
>
> Is this drc_index any different from the one which used to be passed to 
> this function? If no, then I do not see the point in changing the prototype 
> (or make another "this just makes code easier/nicer" patch).

Its the same, I can have a separate patch. As I was changing this code
the drc_index would need to be read in boot and hotplug code. So brought
over the code here.

> If yes, then it would be nice to see what the patch changed in this
> regard in the commit log.
>
>
>
>>       if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
>>           PCI_HEADER_TYPE_BRIDGE) {
>> @@ -945,8 +973,13 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>        * processed by OF beforehand
>>        */
>>       _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
>> -    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
>> -    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>> +    if (drc_name) {
>> +        _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name,
>> +                         strlen(drc_name)));
>> +    }
>> +    if (drc_index) {
>> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>> +    }
>>
>>       _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>>                             RESOURCE_CELLS_ADDRESS));
>> @@ -963,30 +996,34 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>       return 0;
>>   }
>>
>> +typedef struct sPAPRFDT {
>> +    void *fdt;
>> +    int node_off;
>> +    sPAPRPHBState *sphb;
>> +} sPAPRFDT;
>> +
>>   /* create OF node for pci device and required OF DT properties */
>> -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
>> -                                       int drc_index, const char *drc_name,
>> -                                       int *dt_offset)
>> +static int spapr_create_pci_child_dt(PCIDevice *pdev, sPAPRFDT *p,
>> +                                     const char *drc_name)
>
> Why s/dev/pdev/?

PCIDev thats the only reason.

>
>
>>   {
>> -    void *fdt;
>> -    int offset, ret, fdt_size;
>> -    int slot = PCI_SLOT(dev->devfn);
>> -    int func = PCI_FUNC(dev->devfn);
>> -    char nodename[512];
>> +    int offset, ret;
>> +    char nodename[64];
>
> Why s/512/64/?

Earlier this was called in recursion, so there was a comment in previous
series to reduce this to lesser number.

>
> This change and the one above hide what the patch really does to 
> spapr_create_pci_child_dt.
>
>
>> +    int slot = PCI_SLOT(pdev->devfn);
>> +    int func = PCI_FUNC(pdev->devfn);
>>
>> -    fdt = create_device_tree(&fdt_size);
>>       if (func != 0) {
>>           sprintf(nodename, "pci@%d,%d", slot, func);
>>       } else {
>>           sprintf(nodename, "pci@%d", slot);
>>       }
>> -    offset = fdt_add_subnode(fdt, 0, nodename);
>> -    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
>> +    offset = fdt_add_subnode(p->fdt, p->node_off, nodename);
>> +    ret = spapr_populate_pci_child_dt(pdev, p->fdt, offset, p->sphb,
>>                                         drc_name);
>>       g_assert(!ret);
>> -
>> -    *dt_offset = offset;
>> -    return fdt;
>> +    if (ret) {
>> +        return 0;
>> +    }
>> +    return offset;
>>   }
>>
>>   static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>> @@ -996,24 +1033,26 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>   {
>>       sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>>       DeviceState *dev = DEVICE(pdev);
>> -    int drc_index = drck->get_index(drc);
>>       const char *drc_name = drck->get_name(drc);
>> -    void *fdt = NULL;
>> -    int fdt_start_offset = 0;
>> +    int fdt_start_offset = 0, fdt_size;
>> +    sPAPRFDT s_fdt = {NULL, 0, NULL};
>>
>> -    /* boot-time devices get their device tree node created by SLOF, but for
>> -     * hotplugged devices we need QEMU to generate it so the guest can fetch
>> -     * it via RTAS
>> -     */
>>       if (dev->hotplugged) {
>
>
> I understand the patch is not changing this but still while we are here -
> spapr_phb_add_pci_device() is only called from spapr_phb_hot_plug_child(), 
> how can dev->hotplugged be not true in this function (if it cannot, you 
> could get rid of "out:"?

It gets called even when the devices are added during boot.

>
>> -        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
>> -                                        &fdt_start_offset);
>> +        s_fdt.fdt = create_device_tree(&fdt_size);
>> +        s_fdt.sphb = phb;
>> +        s_fdt.node_off = 0;
>> +        fdt_start_offset = spapr_create_pci_child_dt(pdev, &s_fdt, drc_name);
>> +        if (!fdt_start_offset) {
>> +            error_setg(errp, "Failed to create pci child device tree node");
>> +            goto out;
>> +        }
>>       }
>>
>>       drck->attach(drc, DEVICE(pdev),
>> -                 fdt, fdt_start_offset, !dev->hotplugged, errp);
>> +                 s_fdt.fdt, fdt_start_offset, !dev->hotplugged, errp);
>> +out:
>>       if (*errp) {
>> -        g_free(fdt);
>> +        g_free(s_fdt.fdt);
>>       }
>>   }
>>
>> @@ -1043,16 +1082,6 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
>>       drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
>>   }
>>
>> -static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>> -                                               PCIDevice *pdev)
>
> Just adding forward declaration would make the patch shorter.

Yes, I can do that.

>
>> -{
>> -    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>> -    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>> -                                    (phb->index << 16) |
>> -                                    (busnr << 8) |
>> -                                    pdev->devfn);
>> -}
>> -
>>   static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
>>                                        DeviceState *plugged_dev, Error **errp)
>>   {
>> @@ -1482,6 +1511,75 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
>>       return PCI_HOST_BRIDGE(dev);
>>   }
>>
>> +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
>> +                                          void *opaque)
>> +{
>> +    PCIBus *sec_bus;
>> +    sPAPRFDT *p = opaque;
>> +    int offset;
>> +    sPAPRFDT s_fdt;
>> +
>> +    offset = spapr_create_pci_child_dt(pdev, p, NULL);
>> +    if (!offset) {
>> +        error_report("Failed to create pci child device tree node");
>> +        return;
>> +    }
>> +
>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
>> +         PCI_HEADER_TYPE_BRIDGE)) {
>> +        return;
>> +    }
>> +
>> +    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>> +    if (!sec_bus) {
>> +        return;
>> +    }
>> +
>> +    s_fdt.fdt = p->fdt;
>> +    s_fdt.node_off = offset;
>> +    s_fdt.sphb = p->sphb;
>> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                        spapr_populate_pci_devices_dt,
>> +                        &s_fdt);
>> +}
>> +
>> +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
>> +                                           void *opaque)
>> +{
>> +    unsigned int *bus_no = opaque;
>> +    unsigned int primary = *bus_no;
>> +    unsigned int secondary;
>> +    unsigned int subordinate = 0xff;
>> +
>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) ==
>> +         PCI_HEADER_TYPE_BRIDGE)) {
>
>
> s/==/!=/ and "return" and no need in extra indent below.

Right.

>
>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>> +        secondary = *bus_no + 1;
>
>
> (*bus_no)++;
> secondary = *bus_no;
>
> and remove "bus_no = *bus_no + 1" below?
> In fact, I do not need much sense in having "secondary" variable in this 
> function.
>
>> +        pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
>> +        pci_default_write_config(pdev, PCI_SECONDARY_BUS, secondary, 1);
>> +        pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, secondary, 1);
>> +        *bus_no = *bus_no + 1;
>> +        if (sec_bus) {
>
> same here? Just like you did in spapr_populate_pci_devices_dt(). I do not 
> insist though. But having less scopes just makes it easier/nicer to wrap 
> long lines in QEMU coding style (new line starts under "(").
>
>
>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
>> +            pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                                spapr_phb_pci_enumerate_bridge,
>> +                                bus_no);
>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
>> +        }
>> +    }
>> +}
>> +
>> +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
>> +{
>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>> +    unsigned int bus_no = 0;
>> +
>> +    pci_for_each_device(bus, pci_bus_num(bus),
>> +                        spapr_phb_pci_enumerate_bridge,
>> +                        &bus_no);
>> +
>> +}
>> +
>>   int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>                             uint32_t xics_phandle,
>>                             void *fdt)
>> @@ -1521,6 +1619,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>           cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
>>       uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
>>       sPAPRTCETable *tcet;
>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>> +    sPAPRFDT s_fdt;
>>
>>       /* Start populating the FDT */
>>       sprintf(nodename, "pci@%" PRIx64, phb->buid);
>> @@ -1570,6 +1670,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>                    tcet->liobn, tcet->bus_offset,
>>                    tcet->nb_table << tcet->page_shift);
>>
>> +    /* Walk the bridges and program the bus numbers*/
>> +    spapr_phb_pci_enumerate(phb);
>> +    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
>
>
> Can we also add a hack here to scan for the "qemu,phb-enumerated" string in 
> the SLOF bin image?

Really ? That would be ugly.

>
>> +
>> +    /* Populate tree nodes with PCI devices attached */
>> +    s_fdt.fdt = fdt;
>> +    s_fdt.node_off = bus_off;
>> +    s_fdt.sphb = phb;
>> +    pci_for_each_device(bus, pci_bus_num(bus),
>> +                        spapr_populate_pci_devices_dt,
>> +                        &s_fdt);
>> +
>>       ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
>>                                   SPAPR_DR_CONNECTOR_TYPE_PCI);
>>       if (ret) {
>>
>
>
> -- 
> Alexey
Alexey Kardashevskiy May 25, 2015, 9:51 a.m. UTC | #3
On 05/25/2015 02:45 PM, Nikunj A Dadhania wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>
>> On 05/19/2015 06:26 PM, Nikunj A Dadhania wrote:
>>> All the PCI enumeration and device node creation was off-loaded to
>>> SLOF. With PCI hotplug support, code needed to be added to add device
>>> node. This creates multiple copy of the code one in SLOF and other in
>>> hotplug code. To unify this, the patch adds the pci device node
>>> creation in Qemu. For backward compatibility, a flag
>>> "qemu,phb-enumerated" is added to the phb, suggesting to SLOF to not
>>> do device node creation.
>>>
>>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>>> [ Squashed Michael's drc_index changes ]
>>> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>>> ---
>>>    hw/ppc/spapr_pci.c | 188 ++++++++++++++++++++++++++++++++++++++++++-----------
>>>    1 file changed, 150 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 8b02a3e..12f1b9c 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -23,6 +23,7 @@
>>>     * THE SOFTWARE.
>>>     */
>>>    #include "hw/hw.h"
>>> +#include "hw/sysbus.h"
>>>    #include "hw/pci/pci.h"
>>>    #include "hw/pci/msi.h"
>>>    #include "hw/pci/msix.h"
>>> @@ -35,6 +36,7 @@
>>>    #include "qemu/error-report.h"
>>>    #include "qapi/qmp/qerror.h"
>>>
>>> +#include "hw/pci/pci_bridge.h"
>>>    #include "hw/pci/pci_bus.h"
>>>    #include "hw/ppc/spapr_drc.h"
>>>    #include "sysemu/device_tree.h"
>>> @@ -742,6 +744,31 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>        return &phb->iommu_as;
>>>    }
>>>
>>> +
>>> +static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>>> +                                               PCIDevice *pdev)
>>> +{
>>> +    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>>> +    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>>> +                                    (phb->index << 16) |
>>> +                                    (busnr << 8) |
>>> +                                    pdev->devfn);
>>> +}
>>> +
>>> +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
>>> +                                            PCIDevice *pdev)
>>> +{
>>> +    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
>>> +    sPAPRDRConnectorClass *drck;
>>> +
>>> +    if (!drc) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>>> +    return drck->get_index(drc);
>>> +}
>>> +
>>>    /* Macros to operate with address in OF binding to PCI */
>>>    #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
>>>    #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
>>> @@ -879,12 +906,13 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
>>>    }
>>>
>>>    static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>> -                                       int phb_index, int drc_index,
>>> +                                       sPAPRPHBState *sphb,
>>>                                           const char *drc_name)
>>>    {
>>>        ResourceProps rp;
>>>        bool is_bridge = false;
>>>        int pci_status;
>>> +    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
>>
>> Is this drc_index any different from the one which used to be passed to
>> this function? If no, then I do not see the point in changing the prototype
>> (or make another "this just makes code easier/nicer" patch).
>
> Its the same, I can have a separate patch. As I was changing this code
> the drc_index would need to be read in boot and hotplug code. So brought
> over the code here.
>
>> If yes, then it would be nice to see what the patch changed in this
>> regard in the commit log.
>>
>>
>>
>>>        if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
>>>            PCI_HEADER_TYPE_BRIDGE) {
>>> @@ -945,8 +973,13 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>>         * processed by OF beforehand
>>>         */
>>>        _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
>>> -    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
>>> -    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>>> +    if (drc_name) {
>>> +        _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name,
>>> +                         strlen(drc_name)));
>>> +    }
>>> +    if (drc_index) {
>>> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
>>> +    }
>>>
>>>        _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
>>>                              RESOURCE_CELLS_ADDRESS));
>>> @@ -963,30 +996,34 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>>        return 0;
>>>    }
>>>
>>> +typedef struct sPAPRFDT {
>>> +    void *fdt;
>>> +    int node_off;
>>> +    sPAPRPHBState *sphb;
>>> +} sPAPRFDT;
>>> +
>>>    /* create OF node for pci device and required OF DT properties */
>>> -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
>>> -                                       int drc_index, const char *drc_name,
>>> -                                       int *dt_offset)
>>> +static int spapr_create_pci_child_dt(PCIDevice *pdev, sPAPRFDT *p,
>>> +                                     const char *drc_name)
>>
>> Why s/dev/pdev/?
>
> PCIDev thats the only reason.


In this file, PCIDevice is called "dev", "pdev", "d", "pci_dev" so if you 
really want to change the name - do it once and for all occurrences OR do 
not do this at all :)


>
>>
>>
>>>    {
>>> -    void *fdt;
>>> -    int offset, ret, fdt_size;
>>> -    int slot = PCI_SLOT(dev->devfn);
>>> -    int func = PCI_FUNC(dev->devfn);
>>> -    char nodename[512];
>>> +    int offset, ret;
>>> +    char nodename[64];
>>
>> Why s/512/64/?
>
> Earlier this was called in recursion, so there was a comment in previous
> series to reduce this to lesser number.

I'd make a separate patch with s/512/64/ and also do s/sprintf/snprintf/ below.



>> This change and the one above hide what the patch really does to
>> spapr_create_pci_child_dt.
>>
>>
>>> +    int slot = PCI_SLOT(pdev->devfn);
>>> +    int func = PCI_FUNC(pdev->devfn);
>>>
>>> -    fdt = create_device_tree(&fdt_size);
>>>        if (func != 0) {
>>>            sprintf(nodename, "pci@%d,%d", slot, func);
>>>        } else {
>>>            sprintf(nodename, "pci@%d", slot);
>>>        }
>>> -    offset = fdt_add_subnode(fdt, 0, nodename);
>>> -    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
>>> +    offset = fdt_add_subnode(p->fdt, p->node_off, nodename);
>>> +    ret = spapr_populate_pci_child_dt(pdev, p->fdt, offset, p->sphb,
>>>                                          drc_name);
>>>        g_assert(!ret);
>>> -
>>> -    *dt_offset = offset;
>>> -    return fdt;
>>> +    if (ret) {
>>> +        return 0;
>>> +    }
>>> +    return offset;
>>>    }
>>>
>>>    static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>> @@ -996,24 +1033,26 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>>    {
>>>        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>>>        DeviceState *dev = DEVICE(pdev);
>>> -    int drc_index = drck->get_index(drc);
>>>        const char *drc_name = drck->get_name(drc);
>>> -    void *fdt = NULL;
>>> -    int fdt_start_offset = 0;
>>> +    int fdt_start_offset = 0, fdt_size;
>>> +    sPAPRFDT s_fdt = {NULL, 0, NULL};
>>>
>>> -    /* boot-time devices get their device tree node created by SLOF, but for
>>> -     * hotplugged devices we need QEMU to generate it so the guest can fetch
>>> -     * it via RTAS
>>> -     */
>>>        if (dev->hotplugged) {
>>
>>
>> I understand the patch is not changing this but still while we are here -
>> spapr_phb_add_pci_device() is only called from spapr_phb_hot_plug_child(),
>> how can dev->hotplugged be not true in this function (if it cannot, you
>> could get rid of "out:"?
>
> It gets called even when the devices are added during boot.

Where else? I did grep and see just one call:

hw/ppc/spapr_pci.c:1087:static void 
spapr_phb_add_pci_device(sPAPRDRConnector *drc,
hw/ppc/spapr_pci.c:1166:    spapr_phb_add_pci_device(drc, phb, pdev, 
&local_err);



>
>>
>>> -        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
>>> -                                        &fdt_start_offset);
>>> +        s_fdt.fdt = create_device_tree(&fdt_size);
>>> +        s_fdt.sphb = phb;
>>> +        s_fdt.node_off = 0;
>>> +        fdt_start_offset = spapr_create_pci_child_dt(pdev, &s_fdt, drc_name);
>>> +        if (!fdt_start_offset) {
>>> +            error_setg(errp, "Failed to create pci child device tree node");
>>> +            goto out;
>>> +        }
>>>        }
>>>
>>>        drck->attach(drc, DEVICE(pdev),
>>> -                 fdt, fdt_start_offset, !dev->hotplugged, errp);
>>> +                 s_fdt.fdt, fdt_start_offset, !dev->hotplugged, errp);
>>> +out:
>>>        if (*errp) {
>>> -        g_free(fdt);
>>> +        g_free(s_fdt.fdt);
>>>        }
>>>    }
>>>
>>> @@ -1043,16 +1082,6 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
>>>        drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
>>>    }
>>>
>>> -static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
>>> -                                               PCIDevice *pdev)
>>
>> Just adding forward declaration would make the patch shorter.
>
> Yes, I can do that.
>
>>
>>> -{
>>> -    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
>>> -    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
>>> -                                    (phb->index << 16) |
>>> -                                    (busnr << 8) |
>>> -                                    pdev->devfn);
>>> -}
>>> -
>>>    static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
>>>                                         DeviceState *plugged_dev, Error **errp)
>>>    {
>>> @@ -1482,6 +1511,75 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
>>>        return PCI_HOST_BRIDGE(dev);
>>>    }
>>>
>>> +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
>>> +                                          void *opaque)
>>> +{
>>> +    PCIBus *sec_bus;
>>> +    sPAPRFDT *p = opaque;
>>> +    int offset;
>>> +    sPAPRFDT s_fdt;
>>> +
>>> +    offset = spapr_create_pci_child_dt(pdev, p, NULL);
>>> +    if (!offset) {
>>> +        error_report("Failed to create pci child device tree node");
>>> +        return;
>>> +    }
>>> +
>>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
>>> +         PCI_HEADER_TYPE_BRIDGE)) {
>>> +        return;
>>> +    }
>>> +
>>> +    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>>> +    if (!sec_bus) {
>>> +        return;
>>> +    }
>>> +
>>> +    s_fdt.fdt = p->fdt;
>>> +    s_fdt.node_off = offset;
>>> +    s_fdt.sphb = p->sphb;
>>> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>> +                        spapr_populate_pci_devices_dt,
>>> +                        &s_fdt);
>>> +}
>>> +
>>> +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
>>> +                                           void *opaque)
>>> +{
>>> +    unsigned int *bus_no = opaque;
>>> +    unsigned int primary = *bus_no;
>>> +    unsigned int secondary;
>>> +    unsigned int subordinate = 0xff;
>>> +
>>> +    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) ==
>>> +         PCI_HEADER_TYPE_BRIDGE)) {
>>
>>
>> s/==/!=/ and "return" and no need in extra indent below.
>
> Right.
>
>>
>>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
>>> +        secondary = *bus_no + 1;
>>
>>
>> (*bus_no)++;
>> secondary = *bus_no;
>>
>> and remove "bus_no = *bus_no + 1" below?
>> In fact, I do not need much sense in having "secondary" variable in this
>> function.
>>
>>> +        pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
>>> +        pci_default_write_config(pdev, PCI_SECONDARY_BUS, secondary, 1);
>>> +        pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, secondary, 1);
>>> +        *bus_no = *bus_no + 1;
>>> +        if (sec_bus) {
>>
>> same here? Just like you did in spapr_populate_pci_devices_dt(). I do not
>> insist though. But having less scopes just makes it easier/nicer to wrap
>> long lines in QEMU coding style (new line starts under "(").
>>
>>
>>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
>>> +            pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>> +                                spapr_phb_pci_enumerate_bridge,
>>> +                                bus_no);
>>> +            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
>>> +{
>>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>> +    unsigned int bus_no = 0;
>>> +
>>> +    pci_for_each_device(bus, pci_bus_num(bus),
>>> +                        spapr_phb_pci_enumerate_bridge,
>>> +                        &bus_no);
>>> +
>>> +}
>>> +
>>>    int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>                              uint32_t xics_phandle,
>>>                              void *fdt)
>>> @@ -1521,6 +1619,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>            cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
>>>        uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
>>>        sPAPRTCETable *tcet;
>>> +    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>> +    sPAPRFDT s_fdt;
>>>
>>>        /* Start populating the FDT */
>>>        sprintf(nodename, "pci@%" PRIx64, phb->buid);
>>> @@ -1570,6 +1670,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>>                     tcet->liobn, tcet->bus_offset,
>>>                     tcet->nb_table << tcet->page_shift);
>>>
>>> +    /* Walk the bridges and program the bus numbers*/
>>> +    spapr_phb_pci_enumerate(phb);
>>> +    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
>>
>>
>> Can we also add a hack here to scan for the "qemu,phb-enumerated" string in
>> the SLOF bin image?
>
> Really ? That would be ugly.


Well, chances that the binary image will have this line by accident are zero.

And I spent quite some time debugging SRIOV + VFIO when I realized that 
SLOF is old on the test machine where others used to debug too. It would be 
really nice to have a warning that something is wrong. May be extend 
"client-architecture-support" somehow or have some release/date signature 
in known place in SLOF... Thomas (?) also asked for this :)



>
>>
>>> +
>>> +    /* Populate tree nodes with PCI devices attached */
>>> +    s_fdt.fdt = fdt;
>>> +    s_fdt.node_off = bus_off;
>>> +    s_fdt.sphb = phb;
>>> +    pci_for_each_device(bus, pci_bus_num(bus),
>>> +                        spapr_populate_pci_devices_dt,
>>> +                        &s_fdt);
>>> +
>>>        ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
>>>                                    SPAPR_DR_CONNECTOR_TYPE_PCI);
>>>        if (ret) {
>>>
diff mbox

Patch

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 8b02a3e..12f1b9c 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -23,6 +23,7 @@ 
  * THE SOFTWARE.
  */
 #include "hw/hw.h"
+#include "hw/sysbus.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
@@ -35,6 +36,7 @@ 
 #include "qemu/error-report.h"
 #include "qapi/qmp/qerror.h"
 
+#include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
@@ -742,6 +744,31 @@  static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &phb->iommu_as;
 }
 
+
+static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
+                                               PCIDevice *pdev)
+{
+    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
+                                    (phb->index << 16) |
+                                    (busnr << 8) |
+                                    pdev->devfn);
+}
+
+static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
+                                            PCIDevice *pdev)
+{
+    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
+    sPAPRDRConnectorClass *drck;
+
+    if (!drc) {
+        return 0;
+    }
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    return drck->get_index(drc);
+}
+
 /* Macros to operate with address in OF binding to PCI */
 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
@@ -879,12 +906,13 @@  static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
 }
 
 static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
-                                       int phb_index, int drc_index,
+                                       sPAPRPHBState *sphb,
                                        const char *drc_name)
 {
     ResourceProps rp;
     bool is_bridge = false;
     int pci_status;
+    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
 
     if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
         PCI_HEADER_TYPE_BRIDGE) {
@@ -945,8 +973,13 @@  static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
      * processed by OF beforehand
      */
     _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
-    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
-    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+    if (drc_name) {
+        _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name,
+                         strlen(drc_name)));
+    }
+    if (drc_index) {
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+    }
 
     _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
                           RESOURCE_CELLS_ADDRESS));
@@ -963,30 +996,34 @@  static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
     return 0;
 }
 
+typedef struct sPAPRFDT {
+    void *fdt;
+    int node_off;
+    sPAPRPHBState *sphb;
+} sPAPRFDT;
+
 /* create OF node for pci device and required OF DT properties */
-static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
-                                       int drc_index, const char *drc_name,
-                                       int *dt_offset)
+static int spapr_create_pci_child_dt(PCIDevice *pdev, sPAPRFDT *p,
+                                     const char *drc_name)
 {
-    void *fdt;
-    int offset, ret, fdt_size;
-    int slot = PCI_SLOT(dev->devfn);
-    int func = PCI_FUNC(dev->devfn);
-    char nodename[512];
+    int offset, ret;
+    char nodename[64];
+    int slot = PCI_SLOT(pdev->devfn);
+    int func = PCI_FUNC(pdev->devfn);
 
-    fdt = create_device_tree(&fdt_size);
     if (func != 0) {
         sprintf(nodename, "pci@%d,%d", slot, func);
     } else {
         sprintf(nodename, "pci@%d", slot);
     }
-    offset = fdt_add_subnode(fdt, 0, nodename);
-    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
+    offset = fdt_add_subnode(p->fdt, p->node_off, nodename);
+    ret = spapr_populate_pci_child_dt(pdev, p->fdt, offset, p->sphb,
                                       drc_name);
     g_assert(!ret);
-
-    *dt_offset = offset;
-    return fdt;
+    if (ret) {
+        return 0;
+    }
+    return offset;
 }
 
 static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
@@ -996,24 +1033,26 @@  static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     DeviceState *dev = DEVICE(pdev);
-    int drc_index = drck->get_index(drc);
     const char *drc_name = drck->get_name(drc);
-    void *fdt = NULL;
-    int fdt_start_offset = 0;
+    int fdt_start_offset = 0, fdt_size;
+    sPAPRFDT s_fdt = {NULL, 0, NULL};
 
-    /* boot-time devices get their device tree node created by SLOF, but for
-     * hotplugged devices we need QEMU to generate it so the guest can fetch
-     * it via RTAS
-     */
     if (dev->hotplugged) {
-        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
-                                        &fdt_start_offset);
+        s_fdt.fdt = create_device_tree(&fdt_size);
+        s_fdt.sphb = phb;
+        s_fdt.node_off = 0;
+        fdt_start_offset = spapr_create_pci_child_dt(pdev, &s_fdt, drc_name);
+        if (!fdt_start_offset) {
+            error_setg(errp, "Failed to create pci child device tree node");
+            goto out;
+        }
     }
 
     drck->attach(drc, DEVICE(pdev),
-                 fdt, fdt_start_offset, !dev->hotplugged, errp);
+                 s_fdt.fdt, fdt_start_offset, !dev->hotplugged, errp);
+out:
     if (*errp) {
-        g_free(fdt);
+        g_free(s_fdt.fdt);
     }
 }
 
@@ -1043,16 +1082,6 @@  static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
     drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
 }
 
-static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
-                                               PCIDevice *pdev)
-{
-    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
-    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
-                                    (phb->index << 16) |
-                                    (busnr << 8) |
-                                    pdev->devfn);
-}
-
 static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
                                      DeviceState *plugged_dev, Error **errp)
 {
@@ -1482,6 +1511,75 @@  PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
     return PCI_HOST_BRIDGE(dev);
 }
 
+static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
+                                          void *opaque)
+{
+    PCIBus *sec_bus;
+    sPAPRFDT *p = opaque;
+    int offset;
+    sPAPRFDT s_fdt;
+
+    offset = spapr_create_pci_child_dt(pdev, p, NULL);
+    if (!offset) {
+        error_report("Failed to create pci child device tree node");
+        return;
+    }
+
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+         PCI_HEADER_TYPE_BRIDGE)) {
+        return;
+    }
+
+    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+    if (!sec_bus) {
+        return;
+    }
+
+    s_fdt.fdt = p->fdt;
+    s_fdt.node_off = offset;
+    s_fdt.sphb = p->sphb;
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        spapr_populate_pci_devices_dt,
+                        &s_fdt);
+}
+
+static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
+                                           void *opaque)
+{
+    unsigned int *bus_no = opaque;
+    unsigned int primary = *bus_no;
+    unsigned int secondary;
+    unsigned int subordinate = 0xff;
+
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) ==
+         PCI_HEADER_TYPE_BRIDGE)) {
+        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+        secondary = *bus_no + 1;
+        pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
+        pci_default_write_config(pdev, PCI_SECONDARY_BUS, secondary, 1);
+        pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, secondary, 1);
+        *bus_no = *bus_no + 1;
+        if (sec_bus) {
+            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
+            pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                                spapr_phb_pci_enumerate_bridge,
+                                bus_no);
+            pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+        }
+    }
+}
+
+static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
+{
+    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+    unsigned int bus_no = 0;
+
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        spapr_phb_pci_enumerate_bridge,
+                        &bus_no);
+
+}
+
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt)
@@ -1521,6 +1619,8 @@  int spapr_populate_pci_dt(sPAPRPHBState *phb,
         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
     sPAPRTCETable *tcet;
+    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+    sPAPRFDT s_fdt;
 
     /* Start populating the FDT */
     sprintf(nodename, "pci@%" PRIx64, phb->buid);
@@ -1570,6 +1670,18 @@  int spapr_populate_pci_dt(sPAPRPHBState *phb,
                  tcet->liobn, tcet->bus_offset,
                  tcet->nb_table << tcet->page_shift);
 
+    /* Walk the bridges and program the bus numbers*/
+    spapr_phb_pci_enumerate(phb);
+    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
+
+    /* Populate tree nodes with PCI devices attached */
+    s_fdt.fdt = fdt;
+    s_fdt.node_off = bus_off;
+    s_fdt.sphb = phb;
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        spapr_populate_pci_devices_dt,
+                        &s_fdt);
+
     ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
                                 SPAPR_DR_CONNECTOR_TYPE_PCI);
     if (ret) {