diff mbox

[v2,3/4] arm: Add PCIe host bridge in virt machine

Message ID 1421857131-18539-4-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Jan. 21, 2015, 4:18 p.m. UTC
Now that we have a working "generic" PCIe host bridge driver, we can plug
it into ARMs virt machine to always have PCIe available to normal ARM VMs.

I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
into an AArch64 VM with this and they all lived happily ever after.

Signed-off-by: Alexander Graf <agraf@suse.de>
Tested-by: Claudio Fontana <claudio.fontana@huawei.com>

---

Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
systems. If you want to use it with AArch64 guests, please apply the following
patch or wait until upstream cleaned up the code properly:

  http://csgraf.de/agraf/pci/pci-3.19.patch

v1 -> v2:

  - Add define for pci range types
  - Remove mmio_window_size
  - Use 4 PCI INTX IRQ lines
---
 default-configs/arm-softmmu.mak |   2 +
 hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
 include/sysemu/device_tree.h    |   9 ++++
 3 files changed, 118 insertions(+), 5 deletions(-)

Comments

Claudio Fontana Jan. 22, 2015, 3:28 p.m. UTC | #1
Hi Alexander,

thank you for the respin. I retested with the new mappings on OSv for AArch64,
and they show up ok in the guest.

Just a couple minor comments below, otherwise I think this is good.

On 21.01.2015 17:18, Alexander Graf wrote:
> Now that we have a working "generic" PCIe host bridge driver, we can plug
> it into ARMs virt machine to always have PCIe available to normal ARM VMs.
> 
> I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
> into an AArch64 VM with this and they all lived happily ever after.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
> 
> ---
> 
> Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
> systems. If you want to use it with AArch64 guests, please apply the following
> patch or wait until upstream cleaned up the code properly:
> 
>   http://csgraf.de/agraf/pci/pci-3.19.patch
> 
> v1 -> v2:
> 
>   - Add define for pci range types
>   - Remove mmio_window_size
>   - Use 4 PCI INTX IRQ lines
> ---
>  default-configs/arm-softmmu.mak |   2 +
>  hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
>  include/sysemu/device_tree.h    |   9 ++++
>  3 files changed, 118 insertions(+), 5 deletions(-)
> 
> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
> index f3513fa..7671ee2 100644
> --- a/default-configs/arm-softmmu.mak
> +++ b/default-configs/arm-softmmu.mak
> @@ -82,6 +82,8 @@ CONFIG_ZYNQ=y
>  CONFIG_VERSATILE_PCI=y
>  CONFIG_VERSATILE_I2C=y
>  
> +CONFIG_PCI_GENERIC=y
> +
>  CONFIG_SDHCI=y
>  CONFIG_INTEGRATOR_DEBUG=y
>  
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 2353440..a727b4e 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -42,6 +42,7 @@
>  #include "exec/address-spaces.h"
>  #include "qemu/bitops.h"
>  #include "qemu/error-report.h"
> +#include "hw/pci-host/gpex.h"
>  
>  #define NUM_VIRTIO_TRANSPORTS 32
>  
> @@ -69,6 +70,7 @@ enum {
>      VIRT_MMIO,
>      VIRT_RTC,
>      VIRT_FW_CFG,
> +    VIRT_PCIE,
>  };
>  
>  typedef struct MemMapEntry {
> @@ -129,13 +131,14 @@ static const MemMapEntry a15memmap[] = {
>      [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
>      [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
>      /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
> -    /* 0x10000000 .. 0x40000000 reserved for PCI */
> +    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },
>      [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
>  };
>  
>  static const int a15irqmap[] = {
>      [VIRT_UART] = 1,
>      [VIRT_RTC] = 2,
> +    [VIRT_PCIE] = 3, /* ... to 6 */
>      [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
>  };
>  
> @@ -312,7 +315,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
>      }
>  }
>  
> -static void fdt_add_gic_node(const VirtBoardInfo *vbi)
> +static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
>  {
>      uint32_t gic_phandle;
>  
> @@ -331,9 +334,11 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>                                       2, vbi->memmap[VIRT_GIC_CPU].base,
>                                       2, vbi->memmap[VIRT_GIC_CPU].size);
>      qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
> +
> +    return gic_phandle;
>  }
>  
> -static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
> +static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>  {
>      /* We create a standalone GIC v2 */
>      DeviceState *gicdev;
> @@ -380,7 +385,7 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>          pic[i] = qdev_get_gpio_in(gicdev, i);
>      }
>  
> -    fdt_add_gic_node(vbi);
> +    return fdt_add_gic_node(vbi);
>  }
>  
>  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
> @@ -556,6 +561,100 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
>      g_free(nodename);
>  }
>  
> +static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
> +                                int first_irq, const char *nodename)
> +{
> +    int devfn, pin;
> +    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
> +    uint32_t *irq_map = full_irq_map;
> +
> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {

devfn += 0x8 (spaces)

> +        for (pin = 0; pin < 4; pin++) {
> +            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
> +            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
> +            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
> +            int i;
> +
> +            uint32_t map[] = {
> +                devfn << 8, 0, 0,                           /* devfn */
> +                pin + 1,                                    /* PCI pin */
> +                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
> +
> +            /* Convert map to big endian */
> +            for (i = 0; i < 8; i++) {
> +                irq_map[i] = cpu_to_be32(map[i]);
> +            }
> +            irq_map += 8;
> +        }
> +    }
> +
> +    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
> +                     full_irq_map, sizeof(full_irq_map));
> +}

I raise again (? or maybe for the first time) the suggestion to add a comment like:

/* see the openfirmware specs at 
 * http://www.firmware.org/1275/practice/imap/imap0_9d.pdf
 */

and optionally also a reference to the linux host-generic-pci bindings:

/* see the linux Documentation about device tree bindings at:
 * https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
 */

Peter mentioned that the documentation there is not perfect, but at least it's a starting point for the reader to understand what's going on.

> +
> +static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
> +                        uint32_t gic_phandle)
> +{
> +    hwaddr base = vbi->memmap[VIRT_PCIE].base;
> +    hwaddr size = vbi->memmap[VIRT_PCIE].size;
> +    hwaddr size_ioport = 64 * 1024;
> +    hwaddr size_ecam = PCIE_MMCFG_SIZE_MIN;
> +    hwaddr size_mmio = size - size_ecam - size_ioport;
> +    hwaddr base_mmio = base;
> +    hwaddr base_ioport = base_mmio + size_mmio;
> +    hwaddr base_ecam = base_ioport + size_ioport;
> +    int irq = vbi->irqmap[VIRT_PCIE];
> +    MemoryRegion *mmio_alias;
> +    MemoryRegion *mmio_reg;
> +    DeviceState *dev;
> +    char *nodename;
> +    int i;
> +
> +    dev = qdev_create(NULL, TYPE_GPEX_HOST);
> +    qdev_init_nofail(dev);
> +
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base_ecam);
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_ioport);
> +
> +    /* Map the MMIO window at the same spot in bus and cpu layouts */
> +    mmio_alias = g_new0(MemoryRegion, 1);
> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> +                             mmio_reg, base_mmio, size_mmio);
> +    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
> +
> +    for (i = 0; i < 4; i++) {
> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
> +    }
> +
> +    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
> +    qemu_fdt_add_subnode(vbi->fdt, nodename);
> +    qemu_fdt_setprop_string(vbi->fdt, nodename,
> +                            "compatible", "pci-host-ecam-generic");
> +    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0, 1);
> +
> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
> +                                 2, base_ecam, 2, size_ecam);
> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
> +                                 1, FDT_PCI_RANGE_IOPORT, 2, 0,
> +                                 2, base_ioport, 2, size_ioport,
> +
> +                                 1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
> +                                 2, base_mmio, 2, size_mmio);
> +
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
> +    create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
> +
> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
> +                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
> +                           0x7           /* PCI irq */);
> +
> +    g_free(nodename);
> +}
> +
>  static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
>  {
>      const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
> @@ -573,6 +672,7 @@ static void machvirt_init(MachineState *machine)
>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>      const char *cpu_model = machine->cpu_model;
>      VirtBoardInfo *vbi;
> +    uint32_t gic_phandle;
>  
>      if (!cpu_model) {
>          cpu_model = "cortex-a15";
> @@ -634,12 +734,14 @@ static void machvirt_init(MachineState *machine)
>  
>      create_flash(vbi);
>  
> -    create_gic(vbi, pic);
> +    gic_phandle = create_gic(vbi, pic);
>  
>      create_uart(vbi, pic);
>  
>      create_rtc(vbi, pic);
>  
> +    create_pcie(vbi, pic, gic_phandle);
> +
>      /* Create mmio transports, so the user can create virtio backends
>       * (which will be automatically plugged in to the transports). If
>       * no backend is created the transport will just sit harmlessly idle.
> diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
> index 899f05c..979169b 100644
> --- a/include/sysemu/device_tree.h
> +++ b/include/sysemu/device_tree.h
> @@ -110,4 +110,13 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
>                                                  qdt_tmp);                 \
>      })
>  
> +#define FDT_PCI_RANGE_RELOCATABLE          0x80000000
> +#define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
> +#define FDT_PCI_RANGE_ALIASED              0x20000000
> +#define FDT_PCI_RANGE_TYPE                 0x03000000
> +#define FDT_PCI_RANGE_MMIO_64BIT           0x03000000
> +#define FDT_PCI_RANGE_MMIO                 0x02000000
> +#define FDT_PCI_RANGE_IOPORT               0x01000000
> +#define FDT_PCI_RANGE_CONFIG               0x00000000
> +
>  #endif /* __DEVICE_TREE_H__ */
> 

I appreciate those defines.

Thank you,

Claudio
Alexander Graf Jan. 22, 2015, 3:52 p.m. UTC | #2
On 22.01.15 16:28, Claudio Fontana wrote:
> Hi Alexander,
> 
> thank you for the respin. I retested with the new mappings on OSv for AArch64,
> and they show up ok in the guest.
> 
> Just a couple minor comments below, otherwise I think this is good.
> 
> On 21.01.2015 17:18, Alexander Graf wrote:
>> Now that we have a working "generic" PCIe host bridge driver, we can plug
>> it into ARMs virt machine to always have PCIe available to normal ARM VMs.
>>
>> I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
>> into an AArch64 VM with this and they all lived happily ever after.
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
>>
>> ---
>>
>> Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
>> systems. If you want to use it with AArch64 guests, please apply the following
>> patch or wait until upstream cleaned up the code properly:
>>
>>   http://csgraf.de/agraf/pci/pci-3.19.patch
>>
>> v1 -> v2:
>>
>>   - Add define for pci range types
>>   - Remove mmio_window_size
>>   - Use 4 PCI INTX IRQ lines
>> ---
>>  default-configs/arm-softmmu.mak |   2 +
>>  hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
>>  include/sysemu/device_tree.h    |   9 ++++
>>  3 files changed, 118 insertions(+), 5 deletions(-)
>>
>> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
>> index f3513fa..7671ee2 100644
>> --- a/default-configs/arm-softmmu.mak
>> +++ b/default-configs/arm-softmmu.mak
>> @@ -82,6 +82,8 @@ CONFIG_ZYNQ=y
>>  CONFIG_VERSATILE_PCI=y
>>  CONFIG_VERSATILE_I2C=y
>>  
>> +CONFIG_PCI_GENERIC=y
>> +
>>  CONFIG_SDHCI=y
>>  CONFIG_INTEGRATOR_DEBUG=y
>>  
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 2353440..a727b4e 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -42,6 +42,7 @@
>>  #include "exec/address-spaces.h"
>>  #include "qemu/bitops.h"
>>  #include "qemu/error-report.h"
>> +#include "hw/pci-host/gpex.h"
>>  
>>  #define NUM_VIRTIO_TRANSPORTS 32
>>  
>> @@ -69,6 +70,7 @@ enum {
>>      VIRT_MMIO,
>>      VIRT_RTC,
>>      VIRT_FW_CFG,
>> +    VIRT_PCIE,
>>  };
>>  
>>  typedef struct MemMapEntry {
>> @@ -129,13 +131,14 @@ static const MemMapEntry a15memmap[] = {
>>      [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
>>      [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
>>      /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
>> -    /* 0x10000000 .. 0x40000000 reserved for PCI */
>> +    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },
>>      [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
>>  };
>>  
>>  static const int a15irqmap[] = {
>>      [VIRT_UART] = 1,
>>      [VIRT_RTC] = 2,
>> +    [VIRT_PCIE] = 3, /* ... to 6 */
>>      [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
>>  };
>>  
>> @@ -312,7 +315,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
>>      }
>>  }
>>  
>> -static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>> +static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
>>  {
>>      uint32_t gic_phandle;
>>  
>> @@ -331,9 +334,11 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>>                                       2, vbi->memmap[VIRT_GIC_CPU].base,
>>                                       2, vbi->memmap[VIRT_GIC_CPU].size);
>>      qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
>> +
>> +    return gic_phandle;
>>  }
>>  
>> -static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>> +static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>  {
>>      /* We create a standalone GIC v2 */
>>      DeviceState *gicdev;
>> @@ -380,7 +385,7 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>          pic[i] = qdev_get_gpio_in(gicdev, i);
>>      }
>>  
>> -    fdt_add_gic_node(vbi);
>> +    return fdt_add_gic_node(vbi);
>>  }
>>  
>>  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
>> @@ -556,6 +561,100 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
>>      g_free(nodename);
>>  }
>>  
>> +static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
>> +                                int first_irq, const char *nodename)
>> +{
>> +    int devfn, pin;
>> +    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
>> +    uint32_t *irq_map = full_irq_map;
>> +
>> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
> 
> devfn += 0x8 (spaces)

Yeah, I had it like that and it looked uglier. I guess it's a matter of
personal preference?

> 
>> +        for (pin = 0; pin < 4; pin++) {
>> +            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
>> +            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
>> +            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
>> +            int i;
>> +
>> +            uint32_t map[] = {
>> +                devfn << 8, 0, 0,                           /* devfn */
>> +                pin + 1,                                    /* PCI pin */
>> +                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
>> +
>> +            /* Convert map to big endian */
>> +            for (i = 0; i < 8; i++) {
>> +                irq_map[i] = cpu_to_be32(map[i]);
>> +            }
>> +            irq_map += 8;
>> +        }
>> +    }
>> +
>> +    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
>> +                     full_irq_map, sizeof(full_irq_map));
>> +}
> 
> I raise again (? or maybe for the first time) the suggestion to add a comment like:
> 
> /* see the openfirmware specs at 
>  * http://www.firmware.org/1275/practice/imap/imap0_9d.pdf
>  */
> 
> and optionally also a reference to the linux host-generic-pci bindings:
> 
> /* see the linux Documentation about device tree bindings at:
>  * https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
>  */

I added those links to the big description comment at the top of this file.


Alex
Claudio Fontana Jan. 27, 2015, 9:24 a.m. UTC | #3
On 22.01.2015 16:52, Alexander Graf wrote:
> 
> 
> On 22.01.15 16:28, Claudio Fontana wrote:
>> Hi Alexander,
>>
>> thank you for the respin. I retested with the new mappings on OSv for AArch64,
>> and they show up ok in the guest.
>>
>> Just a couple minor comments below, otherwise I think this is good.
>>
>> On 21.01.2015 17:18, Alexander Graf wrote:
>>> Now that we have a working "generic" PCIe host bridge driver, we can plug
>>> it into ARMs virt machine to always have PCIe available to normal ARM VMs.
>>>
>>> I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
>>> into an AArch64 VM with this and they all lived happily ever after.
>>>
>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
>>>
>>> ---
>>>
>>> Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
>>> systems. If you want to use it with AArch64 guests, please apply the following
>>> patch or wait until upstream cleaned up the code properly:
>>>
>>>   http://csgraf.de/agraf/pci/pci-3.19.patch
>>>
>>> v1 -> v2:
>>>
>>>   - Add define for pci range types
>>>   - Remove mmio_window_size
>>>   - Use 4 PCI INTX IRQ lines
>>> ---
>>>  default-configs/arm-softmmu.mak |   2 +
>>>  hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
>>>  include/sysemu/device_tree.h    |   9 ++++
>>>  3 files changed, 118 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
>>> index f3513fa..7671ee2 100644
>>> --- a/default-configs/arm-softmmu.mak
>>> +++ b/default-configs/arm-softmmu.mak
>>> @@ -82,6 +82,8 @@ CONFIG_ZYNQ=y
>>>  CONFIG_VERSATILE_PCI=y
>>>  CONFIG_VERSATILE_I2C=y
>>>  
>>> +CONFIG_PCI_GENERIC=y
>>> +
>>>  CONFIG_SDHCI=y
>>>  CONFIG_INTEGRATOR_DEBUG=y
>>>  
>>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>>> index 2353440..a727b4e 100644
>>> --- a/hw/arm/virt.c
>>> +++ b/hw/arm/virt.c
>>> @@ -42,6 +42,7 @@
>>>  #include "exec/address-spaces.h"
>>>  #include "qemu/bitops.h"
>>>  #include "qemu/error-report.h"
>>> +#include "hw/pci-host/gpex.h"
>>>  
>>>  #define NUM_VIRTIO_TRANSPORTS 32
>>>  
>>> @@ -69,6 +70,7 @@ enum {
>>>      VIRT_MMIO,
>>>      VIRT_RTC,
>>>      VIRT_FW_CFG,
>>> +    VIRT_PCIE,
>>>  };
>>>  
>>>  typedef struct MemMapEntry {
>>> @@ -129,13 +131,14 @@ static const MemMapEntry a15memmap[] = {
>>>      [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
>>>      [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
>>>      /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
>>> -    /* 0x10000000 .. 0x40000000 reserved for PCI */
>>> +    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },
>>>      [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
>>>  };
>>>  
>>>  static const int a15irqmap[] = {
>>>      [VIRT_UART] = 1,
>>>      [VIRT_RTC] = 2,
>>> +    [VIRT_PCIE] = 3, /* ... to 6 */
>>>      [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
>>>  };
>>>  
>>> @@ -312,7 +315,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
>>>      }
>>>  }
>>>  
>>> -static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>>> +static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
>>>  {
>>>      uint32_t gic_phandle;
>>>  
>>> @@ -331,9 +334,11 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>>>                                       2, vbi->memmap[VIRT_GIC_CPU].base,
>>>                                       2, vbi->memmap[VIRT_GIC_CPU].size);
>>>      qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
>>> +
>>> +    return gic_phandle;
>>>  }
>>>  
>>> -static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>> +static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>>  {
>>>      /* We create a standalone GIC v2 */
>>>      DeviceState *gicdev;
>>> @@ -380,7 +385,7 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>>          pic[i] = qdev_get_gpio_in(gicdev, i);
>>>      }
>>>  
>>> -    fdt_add_gic_node(vbi);
>>> +    return fdt_add_gic_node(vbi);
>>>  }
>>>  
>>>  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
>>> @@ -556,6 +561,100 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
>>>      g_free(nodename);
>>>  }
>>>  
>>> +static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
>>> +                                int first_irq, const char *nodename)
>>> +{
>>> +    int devfn, pin;
>>> +    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
>>> +    uint32_t *irq_map = full_irq_map;
>>> +
>>> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
>>
>> devfn += 0x8 (spaces)
> 
> Yeah, I had it like that and it looked uglier. I guess it's a matter of
> personal preference?

You don't have coding standards for this ?

> 
>>
>>> +        for (pin = 0; pin < 4; pin++) {
>>> +            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
>>> +            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
>>> +            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
>>> +            int i;
>>> +
>>> +            uint32_t map[] = {
>>> +                devfn << 8, 0, 0,                           /* devfn */
>>> +                pin + 1,                                    /* PCI pin */
>>> +                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
>>> +
>>> +            /* Convert map to big endian */
>>> +            for (i = 0; i < 8; i++) {
>>> +                irq_map[i] = cpu_to_be32(map[i]);
>>> +            }
>>> +            irq_map += 8;
>>> +        }
>>> +    }
>>> +
>>> +    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
>>> +                     full_irq_map, sizeof(full_irq_map));
>>> +}
>>
>> I raise again (? or maybe for the first time) the suggestion to add a comment like:
>>
>> /* see the openfirmware specs at 
>>  * http://www.firmware.org/1275/practice/imap/imap0_9d.pdf
>>  */
>>
>> and optionally also a reference to the linux host-generic-pci bindings:
>>
>> /* see the linux Documentation about device tree bindings at:
>>  * https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
>>  */
> 
> I added those links to the big description comment at the top of this file.
> 

It is not on top of this file unfortunately.
You put the description comment in an unrelated .h file: the pointers to the docs belong here I think.

Ciao,

Claudio
Peter Maydell Jan. 27, 2015, 10:09 a.m. UTC | #4
On 27 January 2015 at 09:24, Claudio Fontana <claudio.fontana@huawei.com> wrote:
> On 22.01.2015 16:52, Alexander Graf wrote:
>> On 22.01.15 16:28, Claudio Fontana wrote:
>>> Alex wrote;
>>>> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
>>>
>>> devfn += 0x8 (spaces)
>>
>> Yeah, I had it like that and it looked uglier. I guess it's a matter of
>> personal preference?
>
> You don't have coding standards for this ?

Not formal ones, but (a) I think += should have spaces aronud
it and (b) I'm a bit surprised if checkpatch doesn't catch
this (it certainly attempts to...)

-- PMM
Claudio Fontana Jan. 27, 2015, 2:37 p.m. UTC | #5
On 27.01.2015 11:09, Peter Maydell wrote:
> On 27 January 2015 at 09:24, Claudio Fontana <claudio.fontana@huawei.com> wrote:
>> On 22.01.2015 16:52, Alexander Graf wrote:
>>> On 22.01.15 16:28, Claudio Fontana wrote:
>>>> Alex wrote;
>>>>> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
>>>>
>>>> devfn += 0x8 (spaces)
>>>
>>> Yeah, I had it like that and it looked uglier. I guess it's a matter of
>>> personal preference?
>>
>> You don't have coding standards for this ?
> 
> Not formal ones, but (a) I think += should have spaces aronud
> it and (b) I'm a bit surprised if checkpatch doesn't catch
> this (it certainly attempts to...)
> 
> -- PMM

It does:

ERROR: spaces required around that '+=' (ctx:VxV)
#167: FILE: hw/arm/virt.c:571:
+    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
                                         ^

Claudio
Peter Maydell Jan. 27, 2015, 4:52 p.m. UTC | #6
On 21 January 2015 at 16:18, Alexander Graf <agraf@suse.de> wrote:
> Now that we have a working "generic" PCIe host bridge driver, we can plug
> it into ARMs virt machine to always have PCIe available to normal ARM VMs.

"ARM's".

>
> I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
> into an AArch64 VM with this and they all lived happily ever after.
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
>
> ---
>
> Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
> systems. If you want to use it with AArch64 guests, please apply the following
> patch or wait until upstream cleaned up the code properly:
>
>   http://csgraf.de/agraf/pci/pci-3.19.patch
>
> v1 -> v2:
>
>   - Add define for pci range types
>   - Remove mmio_window_size
>   - Use 4 PCI INTX IRQ lines
> ---
>  default-configs/arm-softmmu.mak |   2 +
>  hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
>  include/sysemu/device_tree.h    |   9 ++++
>  3 files changed, 118 insertions(+), 5 deletions(-)
>
> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
> index f3513fa..7671ee2 100644
> --- a/default-configs/arm-softmmu.mak
> +++ b/default-configs/arm-softmmu.mak
> @@ -82,6 +82,8 @@ CONFIG_ZYNQ=y
>  CONFIG_VERSATILE_PCI=y
>  CONFIG_VERSATILE_I2C=y
>
> +CONFIG_PCI_GENERIC=y
> +
>  CONFIG_SDHCI=y
>  CONFIG_INTEGRATOR_DEBUG=y
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 2353440..a727b4e 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -42,6 +42,7 @@
>  #include "exec/address-spaces.h"
>  #include "qemu/bitops.h"
>  #include "qemu/error-report.h"
> +#include "hw/pci-host/gpex.h"
>
>  #define NUM_VIRTIO_TRANSPORTS 32
>
> @@ -69,6 +70,7 @@ enum {
>      VIRT_MMIO,
>      VIRT_RTC,
>      VIRT_FW_CFG,
> +    VIRT_PCIE,
>  };
>
>  typedef struct MemMapEntry {
> @@ -129,13 +131,14 @@ static const MemMapEntry a15memmap[] = {
>      [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
>      [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
>      /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
> -    /* 0x10000000 .. 0x40000000 reserved for PCI */
> +    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },

Might be nice to have a comment about how this is split up between
MMIO, IO and config space.

>      [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
>  };
>
>  static const int a15irqmap[] = {
>      [VIRT_UART] = 1,
>      [VIRT_RTC] = 2,
> +    [VIRT_PCIE] = 3, /* ... to 6 */
>      [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
>  };
>
> @@ -312,7 +315,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
>      }
>  }
>
> -static void fdt_add_gic_node(const VirtBoardInfo *vbi)
> +static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
>  {
>      uint32_t gic_phandle;
>
> @@ -331,9 +334,11 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>                                       2, vbi->memmap[VIRT_GIC_CPU].base,
>                                       2, vbi->memmap[VIRT_GIC_CPU].size);
>      qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
> +
> +    return gic_phandle;
>  }
>
> -static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
> +static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>  {
>      /* We create a standalone GIC v2 */
>      DeviceState *gicdev;
> @@ -380,7 +385,7 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>          pic[i] = qdev_get_gpio_in(gicdev, i);
>      }
>
> -    fdt_add_gic_node(vbi);
> +    return fdt_add_gic_node(vbi);
>  }
>
>  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
> @@ -556,6 +561,100 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
>      g_free(nodename);
>  }
>
> +static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
> +                                int first_irq, const char *nodename)
> +{
> +    int devfn, pin;
> +    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
> +    uint32_t *irq_map = full_irq_map;
> +
> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
> +        for (pin = 0; pin < 4; pin++) {
> +            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
> +            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
> +            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
> +            int i;
> +
> +            uint32_t map[] = {
> +                devfn << 8, 0, 0,                           /* devfn */
> +                pin + 1,                                    /* PCI pin */
> +                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
> +
> +            /* Convert map to big endian */
> +            for (i = 0; i < 8; i++) {
> +                irq_map[i] = cpu_to_be32(map[i]);
> +            }
> +            irq_map += 8;
> +        }
> +    }
> +
> +    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
> +                     full_irq_map, sizeof(full_irq_map));
> +}
> +
> +static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
> +                        uint32_t gic_phandle)
> +{
> +    hwaddr base = vbi->memmap[VIRT_PCIE].base;
> +    hwaddr size = vbi->memmap[VIRT_PCIE].size;
> +    hwaddr size_ioport = 64 * 1024;
> +    hwaddr size_ecam = PCIE_MMCFG_SIZE_MIN;

See remarks in previous patch about wanting more than the minimum
ECAM space.

> +    hwaddr size_mmio = size - size_ecam - size_ioport;
> +    hwaddr base_mmio = base;
> +    hwaddr base_ioport = base_mmio + size_mmio;
> +    hwaddr base_ecam = base_ioport + size_ioport;

PCIe spec says the ECAM window has to be aligned to its size
(ie if it's 1MB in size it has to be 1MB aligned, if it's
256MB aligned it has to be 256MB aligned). I think this is
dumb, but it's what the spec says...

> +    int irq = vbi->irqmap[VIRT_PCIE];
> +    MemoryRegion *mmio_alias;
> +    MemoryRegion *mmio_reg;
> +    DeviceState *dev;
> +    char *nodename;
> +    int i;
> +
> +    dev = qdev_create(NULL, TYPE_GPEX_HOST);
> +    qdev_init_nofail(dev);
> +
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base_ecam);
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_ioport);
> +
> +    /* Map the MMIO window at the same spot in bus and cpu layouts */
> +    mmio_alias = g_new0(MemoryRegion, 1);
> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> +                             mmio_reg, base_mmio, size_mmio);
> +    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);

The comment claims to be mapping the MMIO window twice (in the
system memory space and in the PCI mmio address space) but the
code only seems to be mapping something into system memory space?

> +
> +    for (i = 0; i < 4; i++) {
> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
> +    }
> +
> +    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
> +    qemu_fdt_add_subnode(vbi->fdt, nodename);
> +    qemu_fdt_setprop_string(vbi->fdt, nodename,
> +                            "compatible", "pci-host-ecam-generic");
> +    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0, 1);
> +
> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
> +                                 2, base_ecam, 2, size_ecam);
> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
> +                                 1, FDT_PCI_RANGE_IOPORT, 2, 0,
> +                                 2, base_ioport, 2, size_ioport,
> +
> +                                 1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
> +                                 2, base_mmio, 2, size_mmio);
> +
> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
> +    create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
> +
> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
> +                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
> +                           0x7           /* PCI irq */);

I think at least the interrupt-map-mask and maybe also the
#interrupt-cells setprop calls should go inside the
create_pcie_irq_map() function. In particular the interrupt-map
and interrupt-map-mask are meaningless unless interpreted together
so they should go in the same function.

> +
> +    g_free(nodename);
> +}
> +
>  static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
>  {
>      const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
> @@ -573,6 +672,7 @@ static void machvirt_init(MachineState *machine)
>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>      const char *cpu_model = machine->cpu_model;
>      VirtBoardInfo *vbi;
> +    uint32_t gic_phandle;
>
>      if (!cpu_model) {
>          cpu_model = "cortex-a15";
> @@ -634,12 +734,14 @@ static void machvirt_init(MachineState *machine)
>
>      create_flash(vbi);
>
> -    create_gic(vbi, pic);
> +    gic_phandle = create_gic(vbi, pic);
>
>      create_uart(vbi, pic);
>
>      create_rtc(vbi, pic);
>
> +    create_pcie(vbi, pic, gic_phandle);
> +
>      /* Create mmio transports, so the user can create virtio backends
>       * (which will be automatically plugged in to the transports). If
>       * no backend is created the transport will just sit harmlessly idle.
> diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
> index 899f05c..979169b 100644
> --- a/include/sysemu/device_tree.h
> +++ b/include/sysemu/device_tree.h
> @@ -110,4 +110,13 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
>                                                  qdt_tmp);                 \
>      })
>
> +#define FDT_PCI_RANGE_RELOCATABLE          0x80000000
> +#define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
> +#define FDT_PCI_RANGE_ALIASED              0x20000000
> +#define FDT_PCI_RANGE_TYPE                 0x03000000
> +#define FDT_PCI_RANGE_MMIO_64BIT           0x03000000

I had to go dig up the spec to figure out that it wasn't an error that
these two had the same values. Calling the first _TYPE_MASK might help?

> +#define FDT_PCI_RANGE_MMIO                 0x02000000
> +#define FDT_PCI_RANGE_IOPORT               0x01000000
> +#define FDT_PCI_RANGE_CONFIG               0x00000000

thanks
-- PMM
Alexander Graf Jan. 29, 2015, 2:31 p.m. UTC | #7
On 27.01.15 17:52, Peter Maydell wrote:
> On 21 January 2015 at 16:18, Alexander Graf <agraf@suse.de> wrote:
>> Now that we have a working "generic" PCIe host bridge driver, we can plug
>> it into ARMs virt machine to always have PCIe available to normal ARM VMs.
> 
> "ARM's".
> 
>>
>> I've successfully managed to expose a Bochs VGA device, XHCI and an e1000
>> into an AArch64 VM with this and they all lived happily ever after.
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
>>
>> ---
>>
>> Linux 3.19 only supports the generic PCIe host bridge driver for 32bit ARM
>> systems. If you want to use it with AArch64 guests, please apply the following
>> patch or wait until upstream cleaned up the code properly:
>>
>>   http://csgraf.de/agraf/pci/pci-3.19.patch
>>
>> v1 -> v2:
>>
>>   - Add define for pci range types
>>   - Remove mmio_window_size
>>   - Use 4 PCI INTX IRQ lines
>> ---
>>  default-configs/arm-softmmu.mak |   2 +
>>  hw/arm/virt.c                   | 112 ++++++++++++++++++++++++++++++++++++++--
>>  include/sysemu/device_tree.h    |   9 ++++
>>  3 files changed, 118 insertions(+), 5 deletions(-)
>>
>> diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
>> index f3513fa..7671ee2 100644
>> --- a/default-configs/arm-softmmu.mak
>> +++ b/default-configs/arm-softmmu.mak
>> @@ -82,6 +82,8 @@ CONFIG_ZYNQ=y
>>  CONFIG_VERSATILE_PCI=y
>>  CONFIG_VERSATILE_I2C=y
>>
>> +CONFIG_PCI_GENERIC=y
>> +
>>  CONFIG_SDHCI=y
>>  CONFIG_INTEGRATOR_DEBUG=y
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 2353440..a727b4e 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -42,6 +42,7 @@
>>  #include "exec/address-spaces.h"
>>  #include "qemu/bitops.h"
>>  #include "qemu/error-report.h"
>> +#include "hw/pci-host/gpex.h"
>>
>>  #define NUM_VIRTIO_TRANSPORTS 32
>>
>> @@ -69,6 +70,7 @@ enum {
>>      VIRT_MMIO,
>>      VIRT_RTC,
>>      VIRT_FW_CFG,
>> +    VIRT_PCIE,
>>  };
>>
>>  typedef struct MemMapEntry {
>> @@ -129,13 +131,14 @@ static const MemMapEntry a15memmap[] = {
>>      [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
>>      [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
>>      /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
>> -    /* 0x10000000 .. 0x40000000 reserved for PCI */
>> +    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },
> 
> Might be nice to have a comment about how this is split up between
> MMIO, IO and config space.

I'd prefer to keep the details of that in the allocation code so that we
don't potentially have 2 places that diverge eventually. But I'll
document the allocation code a bit nicer and add a small overview split
here.

> 
>>      [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
>>  };
>>
>>  static const int a15irqmap[] = {
>>      [VIRT_UART] = 1,
>>      [VIRT_RTC] = 2,
>> +    [VIRT_PCIE] = 3, /* ... to 6 */
>>      [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
>>  };
>>
>> @@ -312,7 +315,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
>>      }
>>  }
>>
>> -static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>> +static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
>>  {
>>      uint32_t gic_phandle;
>>
>> @@ -331,9 +334,11 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
>>                                       2, vbi->memmap[VIRT_GIC_CPU].base,
>>                                       2, vbi->memmap[VIRT_GIC_CPU].size);
>>      qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
>> +
>> +    return gic_phandle;
>>  }
>>
>> -static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>> +static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>  {
>>      /* We create a standalone GIC v2 */
>>      DeviceState *gicdev;
>> @@ -380,7 +385,7 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
>>          pic[i] = qdev_get_gpio_in(gicdev, i);
>>      }
>>
>> -    fdt_add_gic_node(vbi);
>> +    return fdt_add_gic_node(vbi);
>>  }
>>
>>  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
>> @@ -556,6 +561,100 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
>>      g_free(nodename);
>>  }
>>
>> +static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
>> +                                int first_irq, const char *nodename)
>> +{
>> +    int devfn, pin;
>> +    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
>> +    uint32_t *irq_map = full_irq_map;
>> +
>> +    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
>> +        for (pin = 0; pin < 4; pin++) {
>> +            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
>> +            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
>> +            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
>> +            int i;
>> +
>> +            uint32_t map[] = {
>> +                devfn << 8, 0, 0,                           /* devfn */
>> +                pin + 1,                                    /* PCI pin */
>> +                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
>> +
>> +            /* Convert map to big endian */
>> +            for (i = 0; i < 8; i++) {
>> +                irq_map[i] = cpu_to_be32(map[i]);
>> +            }
>> +            irq_map += 8;
>> +        }
>> +    }
>> +
>> +    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
>> +                     full_irq_map, sizeof(full_irq_map));
>> +}
>> +
>> +static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
>> +                        uint32_t gic_phandle)
>> +{
>> +    hwaddr base = vbi->memmap[VIRT_PCIE].base;
>> +    hwaddr size = vbi->memmap[VIRT_PCIE].size;
>> +    hwaddr size_ioport = 64 * 1024;
>> +    hwaddr size_ecam = PCIE_MMCFG_SIZE_MIN;
> 
> See remarks in previous patch about wanting more than the minimum
> ECAM space.
> 
>> +    hwaddr size_mmio = size - size_ecam - size_ioport;
>> +    hwaddr base_mmio = base;
>> +    hwaddr base_ioport = base_mmio + size_mmio;
>> +    hwaddr base_ecam = base_ioport + size_ioport;
> 
> PCIe spec says the ECAM window has to be aligned to its size
> (ie if it's 1MB in size it has to be 1MB aligned, if it's
> 256MB aligned it has to be 256MB aligned). I think this is
> dumb, but it's what the spec says...

No problem, fortunately we have a good number of nice macros for this ;)

> 
>> +    int irq = vbi->irqmap[VIRT_PCIE];
>> +    MemoryRegion *mmio_alias;
>> +    MemoryRegion *mmio_reg;
>> +    DeviceState *dev;
>> +    char *nodename;
>> +    int i;
>> +
>> +    dev = qdev_create(NULL, TYPE_GPEX_HOST);
>> +    qdev_init_nofail(dev);
>> +
>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base_ecam);
>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_ioport);
>> +
>> +    /* Map the MMIO window at the same spot in bus and cpu layouts */
>> +    mmio_alias = g_new0(MemoryRegion, 1);
>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>> +                             mmio_reg, base_mmio, size_mmio);
>> +    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
> 
> The comment claims to be mapping the MMIO window twice (in the
> system memory space and in the PCI mmio address space) but the
> code only seems to be mapping something into system memory space?

The comment claims to map it at the same spot. It means the offset in
system memory is the same offset as the one in the mmio window that gets
exported by the PHB.

The PHB exports a UINT64_MAX MMIO region as id 1. This is basically a
full window into the PCI address space. What we do here is to map a 1:1
window between CPU address space and PCI address space.

> 
>> +
>> +    for (i = 0; i < 4; i++) {
>> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
>> +    }
>> +
>> +    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
>> +    qemu_fdt_add_subnode(vbi->fdt, nodename);
>> +    qemu_fdt_setprop_string(vbi->fdt, nodename,
>> +                            "compatible", "pci-host-ecam-generic");
>> +    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
>> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
>> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
>> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0, 1);
>> +
>> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
>> +                                 2, base_ecam, 2, size_ecam);
>> +    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
>> +                                 1, FDT_PCI_RANGE_IOPORT, 2, 0,
>> +                                 2, base_ioport, 2, size_ioport,
>> +
>> +                                 1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
>> +                                 2, base_mmio, 2, size_mmio);
>> +
>> +    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
>> +    create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
>> +
>> +    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
>> +                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
>> +                           0x7           /* PCI irq */);
> 
> I think at least the interrupt-map-mask and maybe also the
> #interrupt-cells setprop calls should go inside the
> create_pcie_irq_map() function. In particular the interrupt-map
> and interrupt-map-mask are meaningless unless interpreted together
> so they should go in the same function.

Sure, works for me.

> 
>> +
>> +    g_free(nodename);
>> +}
>> +
>>  static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
>>  {
>>      const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
>> @@ -573,6 +672,7 @@ static void machvirt_init(MachineState *machine)
>>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>>      const char *cpu_model = machine->cpu_model;
>>      VirtBoardInfo *vbi;
>> +    uint32_t gic_phandle;
>>
>>      if (!cpu_model) {
>>          cpu_model = "cortex-a15";
>> @@ -634,12 +734,14 @@ static void machvirt_init(MachineState *machine)
>>
>>      create_flash(vbi);
>>
>> -    create_gic(vbi, pic);
>> +    gic_phandle = create_gic(vbi, pic);
>>
>>      create_uart(vbi, pic);
>>
>>      create_rtc(vbi, pic);
>>
>> +    create_pcie(vbi, pic, gic_phandle);
>> +
>>      /* Create mmio transports, so the user can create virtio backends
>>       * (which will be automatically plugged in to the transports). If
>>       * no backend is created the transport will just sit harmlessly idle.
>> diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
>> index 899f05c..979169b 100644
>> --- a/include/sysemu/device_tree.h
>> +++ b/include/sysemu/device_tree.h
>> @@ -110,4 +110,13 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
>>                                                  qdt_tmp);                 \
>>      })
>>
>> +#define FDT_PCI_RANGE_RELOCATABLE          0x80000000
>> +#define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
>> +#define FDT_PCI_RANGE_ALIASED              0x20000000
>> +#define FDT_PCI_RANGE_TYPE                 0x03000000
>> +#define FDT_PCI_RANGE_MMIO_64BIT           0x03000000
> 
> I had to go dig up the spec to figure out that it wasn't an error that
> these two had the same values. Calling the first _TYPE_MASK might help?

Probably :)


Alex
Peter Maydell Jan. 29, 2015, 2:34 p.m. UTC | #8
On 29 January 2015 at 14:31, Alexander Graf <agraf@suse.de> wrote:
>
>
> On 27.01.15 17:52, Peter Maydell wrote:
>> On 21 January 2015 at 16:18, Alexander Graf <agraf@suse.de> wrote:
>>> +    /* Map the MMIO window at the same spot in bus and cpu layouts */
>>> +    mmio_alias = g_new0(MemoryRegion, 1);
>>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>>> +                             mmio_reg, base_mmio, size_mmio);
>>> +    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
>>
>> The comment claims to be mapping the MMIO window twice (in the
>> system memory space and in the PCI mmio address space) but the
>> code only seems to be mapping something into system memory space?
>
> The comment claims to map it at the same spot. It means the offset in
> system memory is the same offset as the one in the mmio window that gets
> exported by the PHB.
>
> The PHB exports a UINT64_MAX MMIO region as id 1. This is basically a
> full window into the PCI address space. What we do here is to map a 1:1
> window between CPU address space and PCI address space.

I kind of see, but isn't this just a window from CPU address
space into PCI address space, not vice-versa?

DMA by PCI devices bus-mastering into system memory must be
being set up elsewhere, I think.

-- PMM
Alexander Graf Jan. 29, 2015, 2:37 p.m. UTC | #9
On 29.01.15 15:34, Peter Maydell wrote:
> On 29 January 2015 at 14:31, Alexander Graf <agraf@suse.de> wrote:
>>
>>
>> On 27.01.15 17:52, Peter Maydell wrote:
>>> On 21 January 2015 at 16:18, Alexander Graf <agraf@suse.de> wrote:
>>>> +    /* Map the MMIO window at the same spot in bus and cpu layouts */
>>>> +    mmio_alias = g_new0(MemoryRegion, 1);
>>>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>>>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>>>> +                             mmio_reg, base_mmio, size_mmio);
>>>> +    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
>>>
>>> The comment claims to be mapping the MMIO window twice (in the
>>> system memory space and in the PCI mmio address space) but the
>>> code only seems to be mapping something into system memory space?
>>
>> The comment claims to map it at the same spot. It means the offset in
>> system memory is the same offset as the one in the mmio window that gets
>> exported by the PHB.
>>
>> The PHB exports a UINT64_MAX MMIO region as id 1. This is basically a
>> full window into the PCI address space. What we do here is to map a 1:1
>> window between CPU address space and PCI address space.
> 
> I kind of see, but isn't this just a window from CPU address
> space into PCI address space, not vice-versa?

Yup, exactly. But PCI devices need to map themselves somewhere into the
PCI address space. So if I configure a BAR to live at 0x10000000, it
should also show up at 0x10000000 when accessed from the CPU. That's
what the mapping above is about.

> DMA by PCI devices bus-mastering into system memory must be
> being set up elsewhere, I think.

Yes, that's a different mechanism that's not implemented yet for GPEX
:). On ARM this would happen via SMMU emulation.


Alex
Peter Maydell Jan. 29, 2015, 2:45 p.m. UTC | #10
On 29 January 2015 at 14:37, Alexander Graf <agraf@suse.de> wrote:
> On 29.01.15 15:34, Peter Maydell wrote:
>> I kind of see, but isn't this just a window from CPU address
>> space into PCI address space, not vice-versa?
>
> Yup, exactly. But PCI devices need to map themselves somewhere into the
> PCI address space. So if I configure a BAR to live at 0x10000000, it
> should also show up at 0x10000000 when accessed from the CPU. That's
> what the mapping above is about.

No, it doesn't have to. It's a choice to make the mapping
be such that the system address for a BAR matches the address
in PCI memory space, not a requirement. I agree it's a
sensible choice, though.

But as I say, this code is setting up one mapping (the
system address -> PCI space mapping), not two.

>> DMA by PCI devices bus-mastering into system memory must be
>> being set up elsewhere, I think.
>
> Yes, that's a different mechanism that's not implemented yet for GPEX
> :).

We can't not implement DMA, it would break lots of the usual
PCI devices people want to use. In fact I thought the PCI
core code implemented a default of "DMA by PCI devices goes
to the system address space" if you didn't specifically
set up something else by calling pci_setup_iommu(). This is
definitely how it works for plain PCI host bridges, are
PCIe bridges different?

> On ARM this would happen via SMMU emulation.

There's no requirement for a PCI host controller to be
sat behind an SMMU -- that's a system design choice. We
don't need to implement the SMMU yet (or perhaps ever?);
we definitely need to support PCI DMA.

-- PMM
Alexander Graf Jan. 29, 2015, 2:49 p.m. UTC | #11
On 29.01.15 15:45, Peter Maydell wrote:
> On 29 January 2015 at 14:37, Alexander Graf <agraf@suse.de> wrote:
>> On 29.01.15 15:34, Peter Maydell wrote:
>>> I kind of see, but isn't this just a window from CPU address
>>> space into PCI address space, not vice-versa?
>>
>> Yup, exactly. But PCI devices need to map themselves somewhere into the
>> PCI address space. So if I configure a BAR to live at 0x10000000, it
>> should also show up at 0x10000000 when accessed from the CPU. That's
>> what the mapping above is about.
> 
> No, it doesn't have to. It's a choice to make the mapping
> be such that the system address for a BAR matches the address
> in PCI memory space, not a requirement. I agree it's a
> sensible choice, though.
> 
> But as I say, this code is setting up one mapping (the
> system address -> PCI space mapping), not two.

Yes, the other one is done implicitly by the OS based on what device
tree tells it to do. If we map it at 0, our good old if (BAR == 0)
break; friend hits us again though - and any other arbitrary offset is
as good as a 1:1 map.

> 
>>> DMA by PCI devices bus-mastering into system memory must be
>>> being set up elsewhere, I think.
>>
>> Yes, that's a different mechanism that's not implemented yet for GPEX
>> :).
> 
> We can't not implement DMA, it would break lots of the usual
> PCI devices people want to use. In fact I thought the PCI
> core code implemented a default of "DMA by PCI devices goes
> to the system address space" if you didn't specifically
> set up something else by calling pci_setup_iommu(). This is
> definitely how it works for plain PCI host bridges, are
> PCIe bridges different?

Nono, this is exactly the way it works. The thing that's not implemented
is the SMMU to make that dynamic.

>> On ARM this would happen via SMMU emulation.
> 
> There's no requirement for a PCI host controller to be
> sat behind an SMMU -- that's a system design choice. We
> don't need to implement the SMMU yet (or perhaps ever?);

The main benefit of implementing a guest SMMU is that you don't have to
pin all guest memory at all times. Apart from that and the usual
security aid it only makes things slower ;).

> we definitely need to support PCI DMA.

We do.


Alex
diff mbox

Patch

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index f3513fa..7671ee2 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -82,6 +82,8 @@  CONFIG_ZYNQ=y
 CONFIG_VERSATILE_PCI=y
 CONFIG_VERSATILE_I2C=y
 
+CONFIG_PCI_GENERIC=y
+
 CONFIG_SDHCI=y
 CONFIG_INTEGRATOR_DEBUG=y
 
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2353440..a727b4e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -42,6 +42,7 @@ 
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
+#include "hw/pci-host/gpex.h"
 
 #define NUM_VIRTIO_TRANSPORTS 32
 
@@ -69,6 +70,7 @@  enum {
     VIRT_MMIO,
     VIRT_RTC,
     VIRT_FW_CFG,
+    VIRT_PCIE,
 };
 
 typedef struct MemMapEntry {
@@ -129,13 +131,14 @@  static const MemMapEntry a15memmap[] = {
     [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
     [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
-    /* 0x10000000 .. 0x40000000 reserved for PCI */
+    [VIRT_PCIE] =       { 0x10000000, 0x30000000 },
     [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
 };
 
 static const int a15irqmap[] = {
     [VIRT_UART] = 1,
     [VIRT_RTC] = 2,
+    [VIRT_PCIE] = 3, /* ... to 6 */
     [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
 };
 
@@ -312,7 +315,7 @@  static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
     }
 }
 
-static void fdt_add_gic_node(const VirtBoardInfo *vbi)
+static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
 {
     uint32_t gic_phandle;
 
@@ -331,9 +334,11 @@  static void fdt_add_gic_node(const VirtBoardInfo *vbi)
                                      2, vbi->memmap[VIRT_GIC_CPU].base,
                                      2, vbi->memmap[VIRT_GIC_CPU].size);
     qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
+
+    return gic_phandle;
 }
 
-static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
     /* We create a standalone GIC v2 */
     DeviceState *gicdev;
@@ -380,7 +385,7 @@  static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
         pic[i] = qdev_get_gpio_in(gicdev, i);
     }
 
-    fdt_add_gic_node(vbi);
+    return fdt_add_gic_node(vbi);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -556,6 +561,100 @@  static void create_fw_cfg(const VirtBoardInfo *vbi)
     g_free(nodename);
 }
 
+static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
+                                int first_irq, const char *nodename)
+{
+    int devfn, pin;
+    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
+    uint32_t *irq_map = full_irq_map;
+
+    for (devfn = 0; devfn <= 0x18; devfn+=0x8) {
+        for (pin = 0; pin < 4; pin++) {
+            int irq_type = GIC_FDT_IRQ_TYPE_SPI;
+            int irq_nr = first_irq + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
+            int irq_level = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+            int i;
+
+            uint32_t map[] = {
+                devfn << 8, 0, 0,                           /* devfn */
+                pin + 1,                                    /* PCI pin */
+                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
+
+            /* Convert map to big endian */
+            for (i = 0; i < 8; i++) {
+                irq_map[i] = cpu_to_be32(map[i]);
+            }
+            irq_map += 8;
+        }
+    }
+
+    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
+                     full_irq_map, sizeof(full_irq_map));
+}
+
+static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
+                        uint32_t gic_phandle)
+{
+    hwaddr base = vbi->memmap[VIRT_PCIE].base;
+    hwaddr size = vbi->memmap[VIRT_PCIE].size;
+    hwaddr size_ioport = 64 * 1024;
+    hwaddr size_ecam = PCIE_MMCFG_SIZE_MIN;
+    hwaddr size_mmio = size - size_ecam - size_ioport;
+    hwaddr base_mmio = base;
+    hwaddr base_ioport = base_mmio + size_mmio;
+    hwaddr base_ecam = base_ioport + size_ioport;
+    int irq = vbi->irqmap[VIRT_PCIE];
+    MemoryRegion *mmio_alias;
+    MemoryRegion *mmio_reg;
+    DeviceState *dev;
+    char *nodename;
+    int i;
+
+    dev = qdev_create(NULL, TYPE_GPEX_HOST);
+    qdev_init_nofail(dev);
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base_ecam);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_ioport);
+
+    /* Map the MMIO window at the same spot in bus and cpu layouts */
+    mmio_alias = g_new0(MemoryRegion, 1);
+    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
+                             mmio_reg, base_mmio, size_mmio);
+    memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
+
+    for (i = 0; i < 4; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
+    }
+
+    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
+    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    qemu_fdt_setprop_string(vbi->fdt, nodename,
+                            "compatible", "pci-host-ecam-generic");
+    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
+    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
+    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
+    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0, 1);
+
+    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+                                 2, base_ecam, 2, size_ecam);
+    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+                                 1, FDT_PCI_RANGE_IOPORT, 2, 0,
+                                 2, base_ioport, 2, size_ioport,
+
+                                 1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
+                                 2, base_mmio, 2, size_mmio);
+
+    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
+    create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
+
+    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
+                           0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
+                           0x7           /* PCI irq */);
+
+    g_free(nodename);
+}
+
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 {
     const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
@@ -573,6 +672,7 @@  static void machvirt_init(MachineState *machine)
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     const char *cpu_model = machine->cpu_model;
     VirtBoardInfo *vbi;
+    uint32_t gic_phandle;
 
     if (!cpu_model) {
         cpu_model = "cortex-a15";
@@ -634,12 +734,14 @@  static void machvirt_init(MachineState *machine)
 
     create_flash(vbi);
 
-    create_gic(vbi, pic);
+    gic_phandle = create_gic(vbi, pic);
 
     create_uart(vbi, pic);
 
     create_rtc(vbi, pic);
 
+    create_pcie(vbi, pic, gic_phandle);
+
     /* Create mmio transports, so the user can create virtio backends
      * (which will be automatically plugged in to the transports). If
      * no backend is created the transport will just sit harmlessly idle.
diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
index 899f05c..979169b 100644
--- a/include/sysemu/device_tree.h
+++ b/include/sysemu/device_tree.h
@@ -110,4 +110,13 @@  int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
                                                 qdt_tmp);                 \
     })
 
+#define FDT_PCI_RANGE_RELOCATABLE          0x80000000
+#define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
+#define FDT_PCI_RANGE_ALIASED              0x20000000
+#define FDT_PCI_RANGE_TYPE                 0x03000000
+#define FDT_PCI_RANGE_MMIO_64BIT           0x03000000
+#define FDT_PCI_RANGE_MMIO                 0x02000000
+#define FDT_PCI_RANGE_IOPORT               0x01000000
+#define FDT_PCI_RANGE_CONFIG               0x00000000
+
 #endif /* __DEVICE_TREE_H__ */