diff mbox series

[RFC,1/2] hw/riscv: Add server platform reference machine

Message ID 20240304102540.2789225-2-fei2.wu@intel.com
State New
Headers show
Series Add RISC-V Server Platform Reference Board | expand

Commit Message

Wu, Fei March 4, 2024, 10:25 a.m. UTC
The RISC-V Server Platform specification[1] defines a standardized set
of hardware and software capabilities, that portable system software,
such as OS and hypervisors can rely on being present in a RISC-V server
platform.

A corresponding Qemu RISC-V server platform reference (rvsp-ref for
short) machine type is added to provide a environment for firmware/OS
development and testing. The main features included in rvsp-ref are:

 - Based on riscv virt machine type
 - A new memory map as close as virt machine as possible
 - A new virt CPU type rvsp-ref-cpu for server platform compliance
 - AIA
 - PCIe AHCI
 - PCIe NIC
 - No virtio device
 - No fw_cfg device
 - No ACPI table provided
 - Only minimal device tree nodes

[1] https://github.com/riscv-non-isa/riscv-server-platform

Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 configs/devices/riscv64-softmmu/default.mak |    1 +
 hw/riscv/Kconfig                            |   13 +
 hw/riscv/meson.build                        |    1 +
 hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
 4 files changed, 1259 insertions(+)
 create mode 100644 hw/riscv/server_platform_ref.c

Comments

Daniel Henrique Barboza March 4, 2024, 7:35 p.m. UTC | #1
On 3/4/24 07:25, Fei Wu wrote:
> The RISC-V Server Platform specification[1] defines a standardized set
> of hardware and software capabilities, that portable system software,
> such as OS and hypervisors can rely on being present in a RISC-V server
> platform.
> 
> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> short) machine type is added to provide a environment for firmware/OS
> development and testing. The main features included in rvsp-ref are:
> 
>   - Based on riscv virt machine type
>   - A new memory map as close as virt machine as possible
>   - A new virt CPU type rvsp-ref-cpu for server platform compliance
>   - AIA
>   - PCIe AHCI
>   - PCIe NIC
>   - No virtio device
>   - No fw_cfg device
>   - No ACPI table provided
>   - Only minimal device tree nodes
> 
> [1] https://github.com/riscv-non-isa/riscv-server-platform
> 
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>   configs/devices/riscv64-softmmu/default.mak |    1 +
>   hw/riscv/Kconfig                            |   13 +
>   hw/riscv/meson.build                        |    1 +
>   hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>   4 files changed, 1259 insertions(+)
>   create mode 100644 hw/riscv/server_platform_ref.c
> 
> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
> index 3f68059448..a1d98e49ef 100644
> --- a/configs/devices/riscv64-softmmu/default.mak
> +++ b/configs/devices/riscv64-softmmu/default.mak
> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>   CONFIG_SIFIVE_E=y
>   CONFIG_SIFIVE_U=y
>   CONFIG_RISCV_VIRT=y
> +CONFIG_SERVER_PLATFORM_REF=y
>   CONFIG_MICROCHIP_PFSOC=y
>   CONFIG_SHAKTI_C=y
> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> index 5d644eb7b1..debac5a7f5 100644
> --- a/hw/riscv/Kconfig
> +++ b/hw/riscv/Kconfig
> @@ -48,6 +48,19 @@ config RISCV_VIRT
>       select ACPI
>       select ACPI_PCI
>   
> +config SERVER_PLATFORM_REF
> +    bool
> +    select RISCV_NUMA
> +    select GOLDFISH_RTC
> +    select PCI
> +    select PCI_EXPRESS_GENERIC_BRIDGE
> +    select PFLASH_CFI01
> +    select SERIAL
> +    select RISCV_ACLINT
> +    select RISCV_APLIC
> +    select RISCV_IMSIC
> +    select SIFIVE_TEST
> +
>   config SHAKTI_C
>       bool
>       select RISCV_ACLINT
> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> index 2f7ee81be3..bb3aff91ea 100644
> --- a/hw/riscv/meson.build
> +++ b/hw/riscv/meson.build
> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
>   riscv_ss.add(files('riscv_hart.c'))
>   riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>   riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
>   riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>   riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>   riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
> new file mode 100644
> index 0000000000..ae90c4b27a
> --- /dev/null
> +++ b/hw/riscv/server_platform_ref.c
> @@ -0,0 +1,1244 @@
> +/*
> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
> + *
> + * Copyright (c) 2024 Intel, Inc.
> + *
> + * This board is compliant RISC-V Server platform specification and leveraging
> + * a lot of riscv virt code.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/error-report.h"
> +#include "qemu/guest-random.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-visit-common.h"
> +#include "hw/boards.h"
> +#include "hw/loader.h"
> +#include "hw/sysbus.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/char/serial.h"
> +#include "hw/block/flash.h"
> +#include "hw/ide/pci.h"
> +#include "hw/ide/ahci-pci.h"
> +#include "hw/pci/pci.h"
> +#include "hw/pci-host/gpex.h"
> +#include "hw/core/sysbus-fdt.h"
> +#include "hw/riscv/riscv_hart.h"
> +#include "hw/riscv/boot.h"
> +#include "hw/riscv/numa.h"
> +#include "hw/intc/riscv_aclint.h"
> +#include "hw/intc/riscv_aplic.h"
> +#include "hw/intc/riscv_imsic.h"
> +#include "hw/misc/sifive_test.h"
> +#include "chardev/char.h"
> +#include "sysemu/device_tree.h"
> +#include "sysemu/sysemu.h"
> +#include "sysemu/tcg.h"
> +#include "target/riscv/cpu.h"
> +#include "target/riscv/pmu.h"
> +#include "net/net.h"
> +
> +#define RVSP_CPUS_MAX_BITS             9
> +#define RVSP_CPUS_MAX                  (1 << RVSP_CPUS_MAX_BITS)
> +#define RVSP_SOCKETS_MAX_BITS          2
> +#define RVSP_SOCKETS_MAX               (1 << RVSP_SOCKETS_MAX_BITS)
> +
> +#define RVSP_IRQCHIP_NUM_MSIS 255
> +#define RVSP_IRQCHIP_NUM_SOURCES 96
> +#define RVSP_IRQCHIP_NUM_PRIO_BITS 3
> +#define RVSP_IRQCHIP_MAX_GUESTS_BITS 3
> +#define RVSP_IRQCHIP_MAX_GUESTS ((1U << RVSP_IRQCHIP_MAX_GUESTS_BITS) - 1U)
> +
> +#define FDT_PCI_ADDR_CELLS    3
> +#define FDT_PCI_INT_CELLS     1
> +#define FDT_APLIC_INT_CELLS   2
> +#define FDT_IMSIC_INT_CELLS   0
> +#define FDT_MAX_INT_CELLS     2
> +#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
> +                                 1 + FDT_MAX_INT_CELLS)
> +#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
> +                                 1 + FDT_APLIC_INT_CELLS)
> +
> +#define NUM_SATA_PORTS  6
> +
> +#define TYPE_RVSP_REF_MACHINE MACHINE_TYPE_NAME("rvsp-ref")
> +OBJECT_DECLARE_SIMPLE_TYPE(RVSPMachineState, RVSP_REF_MACHINE)
> +
> +struct RVSPMachineState {
> +    /*< private >*/
> +    MachineState parent;
> +
> +    /*< public >*/
> +    Notifier machine_done;
> +    RISCVHartArrayState soc[RVSP_SOCKETS_MAX];
> +    DeviceState *irqchip[RVSP_SOCKETS_MAX];
> +    PFlashCFI01 *flash[2];
> +
> +    int fdt_size;
> +    int aia_guests;
> +    const MemMapEntry *memmap;
> +};
> +
> +enum {
> +    RVSP_DEBUG,
> +    RVSP_MROM,
> +    RVSP_TEST,
> +    RVSP_RTC,
> +    RVSP_ACLINT,
> +    RVSP_APLIC_M,
> +    RVSP_APLIC_S,
> +    RVSP_UART0,
> +    RVSP_IMSIC_M,
> +    RVSP_IMSIC_S,
> +    RVSP_FLASH,
> +    RVSP_DRAM,
> +    RVSP_PCIE_MMIO,
> +    RVSP_PCIE_PIO,
> +    RVSP_PCIE_ECAM,
> +    RVSP_PCIE_MMIO_HIGH
> +};
> +
> +enum {
> +    RVSP_UART0_IRQ = 10,
> +    RVSP_RTC_IRQ = 11,
> +    RVSP_PCIE_IRQ = 0x20, /* 32 to 35 */
> +};
> +
> +/*
> + * The server soc reference machine physical address space used by some of the
> + * devices namely ACLINT, APLIC and IMSIC depend on number of Sockets, number
> + * of CPUs, and number of IMSIC guest files.
> + *
> + * Various limits defined by RVSP_SOCKETS_MAX_BITS, RVSP_CPUS_MAX_BITS, and
> + * RVSP_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization of server soc
> + * reference machine physical address space.
> + */
> +
> +#define RVSP_IMSIC_GROUP_MAX_SIZE      (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
> +#if RVSP_IMSIC_GROUP_MAX_SIZE < \
> +    IMSIC_GROUP_SIZE(RVSP_CPUS_MAX_BITS, RVSP_IRQCHIP_MAX_GUESTS_BITS)
> +#error "Can't accomodate single IMSIC group in address space"
> +#endif
> +
> +#define RVSP_IMSIC_MAX_SIZE            (RVSP_SOCKETS_MAX * \
> +                                        RVSP_IMSIC_GROUP_MAX_SIZE)
> +#if 0x4000000 < RVSP_IMSIC_MAX_SIZE
> +#error "Can't accomodate all IMSIC groups in address space"
> +#endif
> +
> +static const MemMapEntry rvsp_ref_memmap[] = {
> +    [RVSP_DEBUG] =          {        0x0,         0x100 },
> +    [RVSP_MROM] =           {     0x1000,        0xf000 },
> +    [RVSP_TEST] =           {   0x100000,        0x1000 },
> +    [RVSP_RTC] =            {   0x101000,        0x1000 },
> +    [RVSP_ACLINT] =         {  0x2000000,       0x10000 },
> +    [RVSP_PCIE_PIO] =       {  0x3000000,       0x10000 },
> +    [RVSP_APLIC_M] =        {  0xc000000, APLIC_SIZE(RVSP_CPUS_MAX) },
> +    [RVSP_APLIC_S] =        {  0xd000000, APLIC_SIZE(RVSP_CPUS_MAX) },
> +    [RVSP_UART0] =          { 0x10000000,         0x100 },
> +    [RVSP_FLASH] =          { 0x20000000,     0x4000000 },
> +    [RVSP_IMSIC_M] =        { 0x24000000, RVSP_IMSIC_MAX_SIZE },
> +    [RVSP_IMSIC_S] =        { 0x28000000, RVSP_IMSIC_MAX_SIZE },
> +    [RVSP_PCIE_ECAM] =      { 0x30000000,    0x10000000 },
> +    [RVSP_PCIE_MMIO] =      { 0x40000000,    0x40000000 },
> +    [RVSP_DRAM] =           { 0x80000000, 0xff80000000ull },
> +    [RVSP_PCIE_MMIO_HIGH] = { 0x10000000000ull, 0x10000000000ull },
> +};
> +
> +#define RVSP_FLASH_SECTOR_SIZE (256 * KiB)
> +
> +static PFlashCFI01 *rvsp_flash_create1(RVSPMachineState *s,
> +                                       const char *name,
> +                                       const char *alias_prop_name)
> +{
> +    /*
> +     * Create a single flash device.  We use the same parameters as
> +     * the flash devices on the ARM virt board.
> +     */
> +    DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
> +
> +    qdev_prop_set_uint64(dev, "sector-length", RVSP_FLASH_SECTOR_SIZE);
> +    qdev_prop_set_uint8(dev, "width", 4);
> +    qdev_prop_set_uint8(dev, "device-width", 2);
> +    qdev_prop_set_bit(dev, "big-endian", false);
> +    qdev_prop_set_uint16(dev, "id0", 0x89);
> +    qdev_prop_set_uint16(dev, "id1", 0x18);
> +    qdev_prop_set_uint16(dev, "id2", 0x00);
> +    qdev_prop_set_uint16(dev, "id3", 0x00);
> +    qdev_prop_set_string(dev, "name", name);
> +
> +    object_property_add_child(OBJECT(s), name, OBJECT(dev));
> +    object_property_add_alias(OBJECT(s), alias_prop_name,
> +                              OBJECT(dev), "drive");
> +
> +    return PFLASH_CFI01(dev);
> +}
> +
> +static void rvsp_flash_create(RVSPMachineState *s)
> +{
> +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
> +}

I suggest explicitly creating both flash devices in rvsp_ref_machine_instance_init():

> +static void rvsp_ref_machine_instance_init(Object *obj)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> + 
> +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
> +}

You can then rename rvsp_flash_create1() to rvsp_flash_create().


> +
> +static void rvsp_flash_map1(PFlashCFI01 *flash,
> +                            hwaddr base, hwaddr size,
> +                            MemoryRegion *sysmem)
> +{
> +    DeviceState *dev = DEVICE(flash);
> +
> +    assert(QEMU_IS_ALIGNED(size, RVSP_FLASH_SECTOR_SIZE));
> +    assert(size / RVSP_FLASH_SECTOR_SIZE <= UINT32_MAX);
> +    qdev_prop_set_uint32(dev, "num-blocks", size / RVSP_FLASH_SECTOR_SIZE);
> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +
> +    memory_region_add_subregion(sysmem, base,
> +                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
> +                                                       0));
> +}
> +
> +static void rvsp_flash_map(RVSPMachineState *s,
> +                           MemoryRegion *sysmem)
> +{
> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
> +
> +    rvsp_flash_map1(s->flash[0], flashbase, flashsize, sysmem);
> +    rvsp_flash_map1(s->flash[1], flashbase + flashsize, flashsize, sysmem);
> +}

I suggest rename rvsp_flash_map() to rvsp_create_flash_maps() and rename the
"1" functions to rvsp_flash_map().

> +
> +static void create_pcie_irq_map(RVSPMachineState *s, void *fdt, char *nodename,
> +                                uint32_t irqchip_phandle)
> +{
> +    int pin, dev;
> +    uint32_t irq_map_stride = 0;
> +    uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
> +                          FDT_MAX_INT_MAP_WIDTH] = {};
> +    uint32_t *irq_map = full_irq_map;
> +
> +    /*
> +     * This code creates a standard swizzle of interrupts such that
> +     * each device's first interrupt is based on it's PCI_SLOT number.
> +     * (See pci_swizzle_map_irq_fn())
> +     *
> +     * We only need one entry per interrupt in the table (not one per
> +     * possible slot) seeing the interrupt-map-mask will allow the table
> +     * to wrap to any number of devices.
> +     */
> +    for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
> +        int devfn = dev * 0x8;
> +
> +        for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
> +            int irq_nr = RVSP_PCIE_IRQ +
> +                         ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
> +            int i = 0;
> +
> +            /* Fill PCI address cells */
> +            irq_map[i] = cpu_to_be32(devfn << 8);
> +            i += FDT_PCI_ADDR_CELLS;
> +
> +            /* Fill PCI Interrupt cells */
> +            irq_map[i] = cpu_to_be32(pin + 1);
> +            i += FDT_PCI_INT_CELLS;
> +
> +            /* Fill interrupt controller phandle and cells */
> +            irq_map[i++] = cpu_to_be32(irqchip_phandle);
> +            irq_map[i++] = cpu_to_be32(irq_nr);
> +            irq_map[i++] = cpu_to_be32(0x4);
> +
> +            if (!irq_map_stride) {
> +                irq_map_stride = i;
> +            }
> +            irq_map += irq_map_stride;
> +        }
> +    }
> +
> +    qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
> +                     GPEX_NUM_IRQS * GPEX_NUM_IRQS *
> +                     irq_map_stride * sizeof(uint32_t));
> +
> +    qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
> +                           0x1800, 0, 0, 0x7);
> +}
> +
> +static void create_fdt_socket_cpus(RVSPMachineState *s, int socket,
> +                                   char *clust_name, uint32_t *phandle,
> +                                   uint32_t *intc_phandles)
> +{
> +    int cpu;
> +    uint32_t cpu_phandle;
> +    MachineState *ms = MACHINE(s);
> +    char *name, *cpu_name, *core_name, *intc_name, *sv_name;

We did some changes in the FDT functions in hw/riscv/virt.c where we used a lot
of "g_autofree" calls to avoid having to call g_free() every time. This was done
in the context of fixing a mem leak.

I suggest doing the same in all the FDT functions you're creating for this
new machine.

Also, it seems like some FDT functions we're using here is a copy/paste of what
the 'virt' machine is doing, e.g. create_fdt_socket_memory(). I think it's worth
thinking about moving these common functions somewhere else (e.g. hw/riscv/fdt_helper.c)
to allow us to share code between the machines.

> +    bool is_32_bit = riscv_is_32bit(&s->soc[0]);
> +    uint8_t satp_mode_max;
> +
> +    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
> +        RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
> +
> +        cpu_phandle = (*phandle)++;
> +
> +        cpu_name = g_strdup_printf("/cpus/cpu@%d",
> +            s->soc[socket].hartid_base + cpu);
> +        qemu_fdt_add_subnode(ms->fdt, cpu_name);
> +
> +        if (cpu_ptr->cfg.satp_mode.supported != 0) {
> +            satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
> +            sv_name = g_strdup_printf("riscv,%s",
> +                                      satp_mode_str(satp_mode_max, is_32_bit));
> +            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
> +            g_free(sv_name);
> +        }
> +
> +        name = riscv_isa_string(cpu_ptr);
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
> +        g_free(name);
> +
> +        if (cpu_ptr->cfg.ext_zicbom) {
> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
> +                                  cpu_ptr->cfg.cbom_blocksize);
> +        }
> +
> +        if (cpu_ptr->cfg.ext_zicboz) {
> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
> +                                  cpu_ptr->cfg.cboz_blocksize);
> +        }
> +
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv");
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
> +            s->soc[socket].hartid_base + cpu);
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type", "cpu");
> +        riscv_socket_fdt_write_id(ms, cpu_name, socket);
> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle", cpu_phandle);
> +
> +        intc_phandles[cpu] = (*phandle)++;
> +
> +        intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
> +        qemu_fdt_add_subnode(ms->fdt, intc_name);
> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
> +            intc_phandles[cpu]);
> +        qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
> +            "riscv,cpu-intc");
> +        qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller", NULL, 0);
> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells", 1);
> +
> +        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
> +        qemu_fdt_add_subnode(ms->fdt, core_name);
> +        qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
> +
> +        g_free(core_name);
> +        g_free(intc_name);
> +        g_free(cpu_name);
> +    }
> +}
> +
> +static void create_fdt_socket_memory(RVSPMachineState *s,
> +                                     const MemMapEntry *memmap, int socket)
> +{
> +    char *mem_name;
> +    uint64_t addr, size;
> +    MachineState *ms = MACHINE(s);
> +
> +    addr = memmap[RVSP_DRAM].base + riscv_socket_mem_offset(ms, socket);
> +    size = riscv_socket_mem_size(ms, socket);
> +    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
> +    qemu_fdt_add_subnode(ms->fdt, mem_name);
> +    qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
> +        addr >> 32, addr, size >> 32, size);
> +    qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
> +    riscv_socket_fdt_write_id(ms, mem_name, socket);
> +    g_free(mem_name);
> +}
> +
> +static void create_fdt_socket_aclint(RVSPMachineState *s,
> +                                     const MemMapEntry *memmap, int socket,
> +                                     uint32_t *intc_phandles)
> +{
> +    int cpu;
> +    char *name;
> +    unsigned long addr, size;
> +    uint32_t aclint_cells_size;
> +    uint32_t *aclint_mtimer_cells;
> +    MachineState *ms = MACHINE(s);
> +
> +    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
> +
> +    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
> +        aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
> +    }
> +    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
> +
> +    addr = memmap[RVSP_ACLINT].base +
> +           (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
> +    size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
> +
> +    name = g_strdup_printf("/soc/mtimer@%lx", addr);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "riscv,aclint-mtimer");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
> +        0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
> +        0x0, RISCV_ACLINT_DEFAULT_MTIME);
> +    qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
> +        aclint_mtimer_cells, aclint_cells_size);
> +    riscv_socket_fdt_write_id(ms, name, socket);
> +    g_free(name);
> +
> +    g_free(aclint_mtimer_cells);
> +}
> +
> +static uint32_t imsic_num_bits(uint32_t count)
> +{
> +    uint32_t ret = 0;
> +
> +    while (BIT(ret) < count) {
> +        ret++;
> +    }
> +
> +    return ret;
> +}
> +
> +static void create_fdt_one_imsic(RVSPMachineState *s, hwaddr base_addr,
> +                                 uint32_t *intc_phandles, uint32_t msi_phandle,
> +                                 bool m_mode, uint32_t imsic_guest_bits)
> +{
> +    int cpu, socket;
> +    char *imsic_name;
> +    MachineState *ms = MACHINE(s);
> +    int socket_count = riscv_socket_count(ms);
> +    uint32_t imsic_max_hart_per_socket;
> +    uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
> +
> +    imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
> +    imsic_regs = g_new0(uint32_t, socket_count * 4);
> +
> +    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
> +        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
> +    }
> +
> +    imsic_max_hart_per_socket = 0;
> +    for (socket = 0; socket < socket_count; socket++) {
> +        imsic_addr = base_addr + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +        imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
> +                     s->soc[socket].num_harts;
> +        imsic_regs[socket * 4 + 0] = 0;
> +        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
> +        imsic_regs[socket * 4 + 2] = 0;
> +        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
> +        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
> +            imsic_max_hart_per_socket = s->soc[socket].num_harts;
> +        }
> +    }
> +
> +    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
> +    qemu_fdt_add_subnode(ms->fdt, imsic_name);
> +    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
> +                          FDT_IMSIC_INT_CELLS);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
> +                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
> +                     socket_count * sizeof(uint32_t) * 4);
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
> +                     RVSP_IRQCHIP_NUM_MSIS);
> +
> +    if (imsic_guest_bits) {
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
> +                              imsic_guest_bits);
> +    }
> +
> +    if (socket_count > 1) {
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
> +                              imsic_num_bits(imsic_max_hart_per_socket));
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
> +                              imsic_num_bits(socket_count));
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
> +                              IMSIC_MMIO_GROUP_MIN_SHIFT);
> +    }
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
> +
> +    g_free(imsic_name);
> +    g_free(imsic_regs);
> +    g_free(imsic_cells);
> +}
> +
> +static void create_fdt_imsic(RVSPMachineState *s, const MemMapEntry *memmap,
> +                             uint32_t *phandle, uint32_t *intc_phandles,
> +                             uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
> +{
> +    *msi_m_phandle = (*phandle)++;
> +    *msi_s_phandle = (*phandle)++;
> +
> +    /* M-level IMSIC node */
> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_M].base, intc_phandles,
> +                         *msi_m_phandle, true, 0);
> +
> +    /* S-level IMSIC node */
> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_S].base, intc_phandles,
> +                         *msi_s_phandle, false,
> +                         imsic_num_bits(s->aia_guests + 1));
> +
> +}
> +
> +static void create_fdt_one_aplic(RVSPMachineState *s, int socket,
> +                                 unsigned long aplic_addr, uint32_t aplic_size,
> +                                 uint32_t msi_phandle,
> +                                 uint32_t *intc_phandles,
> +                                 uint32_t aplic_phandle,
> +                                 uint32_t aplic_child_phandle,
> +                                 bool m_mode, int num_harts)
> +{
> +    int cpu;
> +    char *aplic_name;
> +    uint32_t *aplic_cells;
> +    MachineState *ms = MACHINE(s);
> +
> +    aplic_cells = g_new0(uint32_t, num_harts * 2);
> +
> +    for (cpu = 0; cpu < num_harts; cpu++) {
> +        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
> +    }
> +
> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
> +    qemu_fdt_add_subnode(ms->fdt, aplic_name);
> +    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
> +                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
> +    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
> +
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
> +
> +    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
> +                           0x0, aplic_addr, 0x0, aplic_size);
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
> +                          RVSP_IRQCHIP_NUM_SOURCES);
> +
> +    if (aplic_child_phandle) {
> +        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
> +                              aplic_child_phandle);
> +        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
> +                               aplic_child_phandle, 0x1,
> +                               RVSP_IRQCHIP_NUM_SOURCES);
> +    }
> +
> +    riscv_socket_fdt_write_id(ms, aplic_name, socket);
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
> +
> +    g_free(aplic_name);
> +    g_free(aplic_cells);
> +}
> +
> +static void create_fdt_socket_aplic(RVSPMachineState *s,
> +                                    const MemMapEntry *memmap, int socket,
> +                                    uint32_t msi_m_phandle,
> +                                    uint32_t msi_s_phandle,
> +                                    uint32_t *phandle,
> +                                    uint32_t *intc_phandles,
> +                                    uint32_t *aplic_phandles,
> +                                    int num_harts)
> +{
> +    char *aplic_name;
> +    unsigned long aplic_addr;
> +    uint32_t aplic_m_phandle, aplic_s_phandle;
> +
> +    aplic_m_phandle = (*phandle)++;
> +    aplic_s_phandle = (*phandle)++;
> +
> +    /* M-level APLIC node */
> +    aplic_addr = memmap[RVSP_APLIC_M].base +
> +                 (memmap[RVSP_APLIC_M].size * socket);
> +    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_M].size,
> +                         msi_m_phandle, intc_phandles,
> +                         aplic_m_phandle, aplic_s_phandle,
> +                         true, num_harts);
> +
> +    /* S-level APLIC node */
> +    aplic_addr = memmap[RVSP_APLIC_S].base +
> +                 (memmap[RVSP_APLIC_S].size * socket);
> +    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_S].size,
> +                         msi_s_phandle, intc_phandles,
> +                         aplic_s_phandle, 0,
> +                         false, num_harts);
> +
> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
> +
> +    g_free(aplic_name);
> +
> +    aplic_phandles[socket] = aplic_s_phandle;
> +}
> +
> +static void create_fdt_pmu(RVSPMachineState *s)
> +{
> +    char *pmu_name;
> +    MachineState *ms = MACHINE(s);
> +    RISCVCPU hart = s->soc[0].harts[0];
> +
> +    pmu_name = g_strdup_printf("/pmu");
> +    qemu_fdt_add_subnode(ms->fdt, pmu_name);
> +    qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
> +    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
> +
> +    g_free(pmu_name);
> +}
> +
> +static void create_fdt_sockets(RVSPMachineState *s, const MemMapEntry *memmap,
> +                               uint32_t *phandle,
> +                               uint32_t *irq_mmio_phandle,
> +                               uint32_t *irq_pcie_phandle,
> +                               uint32_t *msi_pcie_phandle)
> +{
> +    char *clust_name;
> +    int socket, phandle_pos;
> +    MachineState *ms = MACHINE(s);
> +    uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
> +    uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
> +    int socket_count = riscv_socket_count(ms);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/cpus");
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
> +                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
> +    qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
> +
> +    intc_phandles = g_new0(uint32_t, ms->smp.cpus);
> +
> +    phandle_pos = ms->smp.cpus;
> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
> +        phandle_pos -= s->soc[socket].num_harts;
> +
> +        clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
> +        qemu_fdt_add_subnode(ms->fdt, clust_name);
> +
> +        create_fdt_socket_cpus(s, socket, clust_name, phandle,
> +                               &intc_phandles[phandle_pos]);
> +
> +        create_fdt_socket_memory(s, memmap, socket);
> +
> +        g_free(clust_name);
> +
> +        create_fdt_socket_aclint(s, memmap, socket,
> +            &intc_phandles[phandle_pos]);
> +    }
> +
> +    create_fdt_imsic(s, memmap, phandle, intc_phandles,
> +        &msi_m_phandle, &msi_s_phandle);
> +    *msi_pcie_phandle = msi_s_phandle;
> +
> +    phandle_pos = ms->smp.cpus;
> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
> +        phandle_pos -= s->soc[socket].num_harts;
> +
> +        create_fdt_socket_aplic(s, memmap, socket,
> +                                msi_m_phandle, msi_s_phandle, phandle,
> +                                &intc_phandles[phandle_pos],
> +                                xplic_phandles,
> +                                s->soc[socket].num_harts);
> +    }
> +
> +    g_free(intc_phandles);
> +
> +    for (socket = 0; socket < socket_count; socket++) {
> +        if (socket == 0) {
> +            *irq_mmio_phandle = xplic_phandles[socket];
> +            *irq_pcie_phandle = xplic_phandles[socket];
> +        }
> +        if (socket == 1) {
> +            *irq_pcie_phandle = xplic_phandles[socket];
> +        }
> +    }
> +
> +    riscv_socket_fdt_write_distance_matrix(ms);
> +}
> +
> +static void create_fdt_pcie(RVSPMachineState *s, const MemMapEntry *memmap,
> +                            uint32_t irq_pcie_phandle,
> +                            uint32_t msi_pcie_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/pci@%lx",
> +        (long) memmap[RVSP_PCIE_ECAM].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
> +        FDT_PCI_ADDR_CELLS);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
> +        FDT_PCI_INT_CELLS);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "pci-host-ecam-generic");
> +    qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
> +        memmap[RVSP_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
> +    qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
> +        memmap[RVSP_PCIE_ECAM].base, 0, memmap[RVSP_PCIE_ECAM].size);
> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
> +        1, FDT_PCI_RANGE_IOPORT, 2, 0,
> +        2, memmap[RVSP_PCIE_PIO].base, 2, memmap[RVSP_PCIE_PIO].size,
> +        1, FDT_PCI_RANGE_MMIO,
> +        2, memmap[RVSP_PCIE_MMIO].base,
> +        2, memmap[RVSP_PCIE_MMIO].base, 2, memmap[RVSP_PCIE_MMIO].size,
> +        1, FDT_PCI_RANGE_MMIO_64BIT,
> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base,
> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base, 2,
> +           memmap[RVSP_PCIE_MMIO_HIGH].size);
> +
> +    create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
> +    g_free(name);
> +}
> +
> +static void create_fdt_reset(RVSPMachineState *s, const MemMapEntry *memmap,
> +                             uint32_t *phandle)
> +{
> +    char *name;
> +    uint32_t test_phandle;
> +    MachineState *ms = MACHINE(s);
> +
> +    test_phandle = (*phandle)++;
> +    name = g_strdup_printf("/soc/test@%lx",
> +        (long)memmap[RVSP_TEST].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    {
> +        static const char * const compat[3] = {
> +            "sifive,test1", "sifive,test0", "syscon"
> +        };
> +        qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
> +                                      (char **)&compat, ARRAY_SIZE(compat));
> +    }
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_TEST].base, 0x0, memmap[RVSP_TEST].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
> +    test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
> +    g_free(name);
> +
> +    name = g_strdup_printf("/reboot");
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-reboot");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
> +    g_free(name);
> +
> +    name = g_strdup_printf("/poweroff");
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-poweroff");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
> +    g_free(name);
> +}
> +
> +static void create_fdt_uart(RVSPMachineState *s, const MemMapEntry *memmap,
> +                            uint32_t irq_mmio_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/serial@%lx", (long)memmap[RVSP_UART0].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_UART0].base,
> +        0x0, memmap[RVSP_UART0].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_UART0_IRQ, 0x4);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/chosen");
> +    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
> +    g_free(name);
> +}
> +
> +static void create_fdt_rtc(RVSPMachineState *s, const MemMapEntry *memmap,
> +                           uint32_t irq_mmio_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[RVSP_RTC].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "google,goldfish-rtc");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_RTC].base, 0x0, memmap[RVSP_RTC].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
> +        irq_mmio_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_RTC_IRQ, 0x4);
> +    g_free(name);
> +}
> +
> +static void create_fdt_flash(RVSPMachineState *s, const MemMapEntry *memmap)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
> +
> +    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
> +                                 2, flashbase, 2, flashsize,
> +                                 2, flashbase + flashsize, 2, flashsize);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
> +    g_free(name);
> +}
> +
> +static void create_fdt(RVSPMachineState *s, const MemMapEntry *memmap)
> +{
> +    MachineState *ms = MACHINE(s);
> +    uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
> +    uint32_t irq_pcie_phandle = 1;
> +    uint8_t rng_seed[32];
> +
> +    ms->fdt = create_device_tree(&s->fdt_size);
> +    if (!ms->fdt) {
> +        error_report("create_device_tree() failed");
> +        exit(1);
> +    }
> +
> +    qemu_fdt_setprop_string(ms->fdt, "/", "model", "riscv-rvsp-ref,qemu");
> +    qemu_fdt_setprop_string(ms->fdt, "/", "compatible", "riscv-rvsp-ref");
> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/soc");
> +    qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
> +    qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible", "simple-bus");
> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
> +
> +    create_fdt_sockets(s, memmap, &phandle, &irq_mmio_phandle,
> +                       &irq_pcie_phandle, &msi_pcie_phandle);
> +
> +    create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
> +
> +    create_fdt_reset(s, memmap, &phandle);
> +
> +    create_fdt_uart(s, memmap, irq_mmio_phandle);
> +
> +    create_fdt_rtc(s, memmap, irq_mmio_phandle);
> +
> +    create_fdt_flash(s, memmap);
> +    create_fdt_pmu(s);
> +
> +    /* Pass seed to RNG */
> +    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
> +    qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
> +                     rng_seed, sizeof(rng_seed));
> +}
> +
> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> +                                          DeviceState *irqchip,
> +                                          RVSPMachineState *s)
> +{
> +    DeviceState *dev;
> +    PCIHostState *pci;
> +    PCIDevice *pdev_ahci;
> +    AHCIPCIState *ich9;
> +    DriveInfo *hd[NUM_SATA_PORTS];
> +    MemoryRegion *ecam_alias, *ecam_reg;
> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
> +    MachineClass *mc = MACHINE_GET_CLASS(s);
> +    qemu_irq irq;
> +    int i;
> +
> +    dev = qdev_new(TYPE_GPEX_HOST);
> +
> +    /* Set GPEX object properties for the rvsp ref machine */
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
> +                            ecam_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
> +                            ecam_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
> +                             mmio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
> +                            mmio_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
> +                             high_mmio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
> +                            high_mmio_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
> +                            pio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
> +                            pio_size, NULL);
> +
> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +
> +    ecam_alias = g_new0(MemoryRegion, 1);
> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
> +                             ecam_reg, 0, ecam_size);
> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
> +
> +    mmio_alias = g_new0(MemoryRegion, 1);
> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> +                             mmio_reg, mmio_base, mmio_size);
> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
> +
> +    /* Map high MMIO space */
> +    high_mmio_alias = g_new0(MemoryRegion, 1);
> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
> +                             mmio_reg, high_mmio_base, high_mmio_size);
> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
> +                                high_mmio_alias);
> +
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
> +
> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
> +
> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
> +    }
> +
> +    pci = PCI_HOST_BRIDGE(dev);
> +    pci_init_nic_devices(pci->bus, mc->default_nic);
> +    /* IDE disk setup.  */
> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
> +    ich9 = ICH9_AHCI(pdev_ahci);
> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
> +    ide_drive_get(hd, ich9->ahci.ports);
> +    ahci_ide_create_devs(&ich9->ahci, hd);
> +
> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
> +    return dev;
> +}
> +
> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
> +                                        const MemMapEntry *memmap, int socket,
> +                                        int base_hartid, int hart_count)
> +{
> +    int i;
> +    hwaddr addr;
> +    uint32_t guest_bits;
> +    DeviceState *aplic_s = NULL;
> +    DeviceState *aplic_m = NULL;
> +    bool msimode = true;
> +
> +    /* Per-socket M-level IMSICs */
> +    addr = memmap[RVSP_IMSIC_M].base +
> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +    for (i = 0; i < hart_count; i++) {
> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
> +                           base_hartid + i, true, 1,
> +                           RVSP_IRQCHIP_NUM_MSIS);
> +    }
> +
> +    /* Per-socket S-level IMSICs */
> +    guest_bits = imsic_num_bits(aia_guests + 1);
> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +    for (i = 0; i < hart_count; i++) {
> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
> +                           base_hartid + i, false, 1 + aia_guests,
> +                           RVSP_IRQCHIP_NUM_MSIS);
> +    }
> +
> +    /* Per-socket M-level APLIC */
> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
> +                                 socket * memmap[RVSP_APLIC_M].size,
> +                                 memmap[RVSP_APLIC_M].size,
> +                                 (msimode) ? 0 : base_hartid,
> +                                 (msimode) ? 0 : hart_count,
> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> +                                 msimode, true, NULL);
> +
> +    /* Per-socket S-level APLIC */
> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
> +                                 socket * memmap[RVSP_APLIC_S].size,
> +                                 memmap[RVSP_APLIC_S].size,
> +                                 (msimode) ? 0 : base_hartid,
> +                                 (msimode) ? 0 : hart_count,
> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> +                                 msimode, false, aplic_m);
> +
> +    (void)aplic_s;
> +    return aplic_m;
> +}
> +
> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
> +{
> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
> +                                       machine_done);
> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> +    MachineState *machine = MACHINE(s);
> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
> +    target_ulong firmware_end_addr, kernel_start_addr;
> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
> +    uint64_t fdt_load_addr;
> +    uint64_t kernel_entry = 0;
> +    BlockBackend *pflash_blk0;
> +
> +    /* load/create device tree */
> +    if (machine->dtb) {
> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
> +        if (!machine->fdt) {
> +            error_report("load_device_tree() failed");
> +            exit(1);
> +        }
> +    } else {
> +        create_fdt(s, memmap);
> +    }

We fixed a bug in the 'virt' machine  that had to do with the guest-loader device
not being properly configured. It had to do with the FDT not being properly generated.
See 7a87ba8956e59 and https://gitlab.com/qemu-project/qemu/-/issues/1925 for more info.

Long story short, the 'virt' machine introduced a 'finalize_fdt' step where some pieces
of FDT is created during machine_done(), and some are created during init() via create_fdt().
I think it's a good idea to consider doing that in this new machine to avoid this kind of
problem.


Thanks,

Daniel

> +
> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
> +                                                     start_addr, NULL);
> +
> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
> +    if (pflash_blk0) {
> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
> +            /*
> +             * Pflash was supplied but bios is none and not KVM guest,
> +             * let's overwrite the address we jump to after reset to
> +             * the base of the flash.
> +             */
> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
> +        } else {
> +            /*
> +             * Pflash was supplied but either KVM guest or bios is not none.
> +             * In this case, base of the flash would contain S-mode payload.
> +             */
> +            riscv_setup_firmware_boot(machine);
> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
> +        }
> +    }
> +
> +    if (machine->kernel_filename && !kernel_entry) {
> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> +                                                         firmware_end_addr);
> +
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr, true, NULL);
> +    }
> +
> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
> +                                           memmap[RVSP_DRAM].size,
> +                                           machine);
> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
> +
> +    /* load the reset vector */
> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
> +                              rvsp_ref_memmap[RVSP_MROM].base,
> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
> +                              fdt_load_addr);
> +
> +}
> +
> +static void rvsp_ref_machine_init(MachineState *machine)
> +{
> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
> +    MemoryRegion *system_memory = get_system_memory();
> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
> +    char *soc_name;
> +    DeviceState *mmio_irqchip, *pcie_irqchip;
> +    int i, base_hartid, hart_count;
> +    int socket_count = riscv_socket_count(machine);
> +
> +    /* Check socket count limit */
> +    if (RVSP_SOCKETS_MAX < socket_count) {
> +        error_report("number of sockets/nodes should be less than %d",
> +            RVSP_SOCKETS_MAX);
> +        exit(1);
> +    }
> +
> +    if (!tcg_enabled()) {
> +        error_report("'aclint' is only available with TCG acceleration");
> +        exit(1);
> +    }
> +
> +    /* Initialize sockets */
> +    mmio_irqchip = pcie_irqchip = NULL;
> +    for (i = 0; i < socket_count; i++) {
> +        if (!riscv_socket_check_hartids(machine, i)) {
> +            error_report("discontinuous hartids in socket%d", i);
> +            exit(1);
> +        }
> +
> +        base_hartid = riscv_socket_first_hartid(machine, i);
> +        if (base_hartid < 0) {
> +            error_report("can't find hartid base for socket%d", i);
> +            exit(1);
> +        }
> +
> +        hart_count = riscv_socket_hart_count(machine, i);
> +        if (hart_count < 0) {
> +            error_report("can't find hart count for socket%d", i);
> +            exit(1);
> +        }
> +
> +        soc_name = g_strdup_printf("soc%d", i);
> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
> +                                TYPE_RISCV_HART_ARRAY);
> +        g_free(soc_name);
> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
> +                                machine->cpu_type, &error_abort);
> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
> +                                base_hartid, &error_abort);
> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
> +                                hart_count, &error_abort);
> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
> +
> +        /* Per-socket ACLINT MTIMER */
> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> +            base_hartid, hart_count,
> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
> +            RISCV_ACLINT_DEFAULT_MTIME,
> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
> +
> +        /* Per-socket interrupt controller */
> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
> +                                            memmap, i, base_hartid,
> +                                            hart_count);
> +
> +        /* Try to use different IRQCHIP instance based device type */
> +        if (i == 0) {
> +            mmio_irqchip = s->irqchip[i];
> +            pcie_irqchip = s->irqchip[i];
> +        }
> +        if (i == 1) {
> +            pcie_irqchip = s->irqchip[i];
> +        }
> +    }
> +
> +    s->memmap = rvsp_ref_memmap;
> +
> +    /* register system main memory (actual RAM) */
> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
> +        machine->ram);
> +
> +    /* boot rom */
> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
> +                           memmap[RVSP_MROM].size, &error_fatal);
> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
> +                                mask_rom);
> +
> +    /* SiFive Test MMIO device */
> +    sifive_test_create(memmap[RVSP_TEST].base);
> +
> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
> +
> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
> +
> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
> +
> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
> +        /* Map legacy -drive if=pflash to machine properties */
> +        pflash_cfi01_legacy_drive(s->flash[i],
> +                                  drive_get(IF_PFLASH, 0, i));
> +    }
> +    rvsp_flash_map(s, system_memory);
> +
> +    s->machine_done.notify = rvsp_ref_machine_done;
> +    qemu_add_machine_init_done_notifier(&s->machine_done);
> +}
> +
> +static void rvsp_ref_machine_instance_init(Object *obj)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +
> +    rvsp_flash_create(s);
> +}
> +
> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +    char val[32];
> +
> +    sprintf(val, "%d", s->aia_guests);
> +    return g_strdup(val);
> +}
> +
> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +
> +    s->aia_guests = atoi(val);
> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
> +                          RVSP_IRQCHIP_MAX_GUESTS);
> +    }
> +}
> +
> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
> +{
> +    char str[128];
> +    MachineClass *mc = MACHINE_CLASS(oc);
> +
> +    mc->desc = "RISC-V Server SoC Reference board";
> +    mc->init = rvsp_ref_machine_init;
> +    mc->max_cpus = RVSP_CPUS_MAX;
> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
> +    mc->pci_allow_0_address = true;
> +    mc->default_nic = "e1000e";
> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
> +    mc->numa_mem_supported = true;
> +    /* platform instead of architectural choice */
> +    mc->cpu_cluster_has_numa_boundary = true;
> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
> +
> +    object_class_property_add_str(oc, "aia-guests",
> +                                  rvsp_ref_get_aia_guests,
> +                                  rvsp_ref_set_aia_guests);
> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
> +    object_class_property_set_description(oc, "aia-guests", str);
> +}
> +
> +static const TypeInfo rvsp_ref_typeinfo = {
> +    .name       = TYPE_RVSP_REF_MACHINE,
> +    .parent     = TYPE_MACHINE,
> +    .class_init = rvsp_ref_machine_class_init,
> +    .instance_init = rvsp_ref_machine_instance_init,
> +    .instance_size = sizeof(RVSPMachineState),
> +};
> +
> +static void rvsp_ref_init_register_types(void)
> +{
> +    type_register_static(&rvsp_ref_typeinfo);
> +}
> +
> +type_init(rvsp_ref_init_register_types)
Wu, Fei March 5, 2024, 5:56 a.m. UTC | #2
On 3/5/2024 3:35 AM, Daniel Henrique Barboza wrote:
> 
> 
> On 3/4/24 07:25, Fei Wu wrote:
>> The RISC-V Server Platform specification[1] defines a standardized set
>> of hardware and software capabilities, that portable system software,
>> such as OS and hypervisors can rely on being present in a RISC-V server
>> platform.
>>
>> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
>> short) machine type is added to provide a environment for firmware/OS
>> development and testing. The main features included in rvsp-ref are:
>>
>>   - Based on riscv virt machine type
>>   - A new memory map as close as virt machine as possible
>>   - A new virt CPU type rvsp-ref-cpu for server platform compliance
>>   - AIA
>>   - PCIe AHCI
>>   - PCIe NIC
>>   - No virtio device
>>   - No fw_cfg device
>>   - No ACPI table provided
>>   - Only minimal device tree nodes
>>
>> [1] https://github.com/riscv-non-isa/riscv-server-platform
>>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>   configs/devices/riscv64-softmmu/default.mak |    1 +
>>   hw/riscv/Kconfig                            |   13 +
>>   hw/riscv/meson.build                        |    1 +
>>   hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>>   4 files changed, 1259 insertions(+)
>>   create mode 100644 hw/riscv/server_platform_ref.c
>>
>> diff --git a/configs/devices/riscv64-softmmu/default.mak
>> b/configs/devices/riscv64-softmmu/default.mak
>> index 3f68059448..a1d98e49ef 100644
>> --- a/configs/devices/riscv64-softmmu/default.mak
>> +++ b/configs/devices/riscv64-softmmu/default.mak
>> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>>   CONFIG_SIFIVE_E=y
>>   CONFIG_SIFIVE_U=y
>>   CONFIG_RISCV_VIRT=y
>> +CONFIG_SERVER_PLATFORM_REF=y
>>   CONFIG_MICROCHIP_PFSOC=y
>>   CONFIG_SHAKTI_C=y
>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
>> index 5d644eb7b1..debac5a7f5 100644
>> --- a/hw/riscv/Kconfig
>> +++ b/hw/riscv/Kconfig
>> @@ -48,6 +48,19 @@ config RISCV_VIRT
>>       select ACPI
>>       select ACPI_PCI
>>   +config SERVER_PLATFORM_REF
>> +    bool
>> +    select RISCV_NUMA
>> +    select GOLDFISH_RTC
>> +    select PCI
>> +    select PCI_EXPRESS_GENERIC_BRIDGE
>> +    select PFLASH_CFI01
>> +    select SERIAL
>> +    select RISCV_ACLINT
>> +    select RISCV_APLIC
>> +    select RISCV_IMSIC
>> +    select SIFIVE_TEST
>> +
>>   config SHAKTI_C
>>       bool
>>       select RISCV_ACLINT
>> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
>> index 2f7ee81be3..bb3aff91ea 100644
>> --- a/hw/riscv/meson.build
>> +++ b/hw/riscv/meson.build
>> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true:
>> files('numa.c'))
>>   riscv_ss.add(files('riscv_hart.c'))
>>   riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>>   riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
>> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true:
>> files('server_platform_ref.c'))
>>   riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>>   riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>>   riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
>> diff --git a/hw/riscv/server_platform_ref.c
>> b/hw/riscv/server_platform_ref.c
>> new file mode 100644
>> index 0000000000..ae90c4b27a
>> --- /dev/null
>> +++ b/hw/riscv/server_platform_ref.c
>> @@ -0,0 +1,1244 @@
>> +/*
>> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
>> + *
>> + * Copyright (c) 2024 Intel, Inc.
>> + *
>> + * This board is compliant RISC-V Server platform specification and
>> leveraging
>> + * a lot of riscv virt code.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2 or later, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> along with
>> + * this program.  If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/units.h"
>> +#include "qemu/error-report.h"
>> +#include "qemu/guest-random.h"
>> +#include "qapi/error.h"
>> +#include "qapi/qapi-visit-common.h"
>> +#include "hw/boards.h"
>> +#include "hw/loader.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/qdev-properties.h"
>> +#include "hw/char/serial.h"
>> +#include "hw/block/flash.h"
>> +#include "hw/ide/pci.h"
>> +#include "hw/ide/ahci-pci.h"
>> +#include "hw/pci/pci.h"
>> +#include "hw/pci-host/gpex.h"
>> +#include "hw/core/sysbus-fdt.h"
>> +#include "hw/riscv/riscv_hart.h"
>> +#include "hw/riscv/boot.h"
>> +#include "hw/riscv/numa.h"
>> +#include "hw/intc/riscv_aclint.h"
>> +#include "hw/intc/riscv_aplic.h"
>> +#include "hw/intc/riscv_imsic.h"
>> +#include "hw/misc/sifive_test.h"
>> +#include "chardev/char.h"
>> +#include "sysemu/device_tree.h"
>> +#include "sysemu/sysemu.h"
>> +#include "sysemu/tcg.h"
>> +#include "target/riscv/cpu.h"
>> +#include "target/riscv/pmu.h"
>> +#include "net/net.h"
>> +
>> +#define RVSP_CPUS_MAX_BITS             9
>> +#define RVSP_CPUS_MAX                  (1 << RVSP_CPUS_MAX_BITS)
>> +#define RVSP_SOCKETS_MAX_BITS          2
>> +#define RVSP_SOCKETS_MAX               (1 << RVSP_SOCKETS_MAX_BITS)
>> +
>> +#define RVSP_IRQCHIP_NUM_MSIS 255
>> +#define RVSP_IRQCHIP_NUM_SOURCES 96
>> +#define RVSP_IRQCHIP_NUM_PRIO_BITS 3
>> +#define RVSP_IRQCHIP_MAX_GUESTS_BITS 3
>> +#define RVSP_IRQCHIP_MAX_GUESTS ((1U << RVSP_IRQCHIP_MAX_GUESTS_BITS)
>> - 1U)
>> +
>> +#define FDT_PCI_ADDR_CELLS    3
>> +#define FDT_PCI_INT_CELLS     1
>> +#define FDT_APLIC_INT_CELLS   2
>> +#define FDT_IMSIC_INT_CELLS   0
>> +#define FDT_MAX_INT_CELLS     2
>> +#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS
>> + \
>> +                                 1 + FDT_MAX_INT_CELLS)
>> +#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS +
>> FDT_PCI_INT_CELLS + \
>> +                                 1 + FDT_APLIC_INT_CELLS)
>> +
>> +#define NUM_SATA_PORTS  6
>> +
>> +#define TYPE_RVSP_REF_MACHINE MACHINE_TYPE_NAME("rvsp-ref")
>> +OBJECT_DECLARE_SIMPLE_TYPE(RVSPMachineState, RVSP_REF_MACHINE)
>> +
>> +struct RVSPMachineState {
>> +    /*< private >*/
>> +    MachineState parent;
>> +
>> +    /*< public >*/
>> +    Notifier machine_done;
>> +    RISCVHartArrayState soc[RVSP_SOCKETS_MAX];
>> +    DeviceState *irqchip[RVSP_SOCKETS_MAX];
>> +    PFlashCFI01 *flash[2];
>> +
>> +    int fdt_size;
>> +    int aia_guests;
>> +    const MemMapEntry *memmap;
>> +};
>> +
>> +enum {
>> +    RVSP_DEBUG,
>> +    RVSP_MROM,
>> +    RVSP_TEST,
>> +    RVSP_RTC,
>> +    RVSP_ACLINT,
>> +    RVSP_APLIC_M,
>> +    RVSP_APLIC_S,
>> +    RVSP_UART0,
>> +    RVSP_IMSIC_M,
>> +    RVSP_IMSIC_S,
>> +    RVSP_FLASH,
>> +    RVSP_DRAM,
>> +    RVSP_PCIE_MMIO,
>> +    RVSP_PCIE_PIO,
>> +    RVSP_PCIE_ECAM,
>> +    RVSP_PCIE_MMIO_HIGH
>> +};
>> +
>> +enum {
>> +    RVSP_UART0_IRQ = 10,
>> +    RVSP_RTC_IRQ = 11,
>> +    RVSP_PCIE_IRQ = 0x20, /* 32 to 35 */
>> +};
>> +
>> +/*
>> + * The server soc reference machine physical address space used by
>> some of the
>> + * devices namely ACLINT, APLIC and IMSIC depend on number of
>> Sockets, number
>> + * of CPUs, and number of IMSIC guest files.
>> + *
>> + * Various limits defined by RVSP_SOCKETS_MAX_BITS,
>> RVSP_CPUS_MAX_BITS, and
>> + * RVSP_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization of
>> server soc
>> + * reference machine physical address space.
>> + */
>> +
>> +#define RVSP_IMSIC_GROUP_MAX_SIZE      (1U <<
>> IMSIC_MMIO_GROUP_MIN_SHIFT)
>> +#if RVSP_IMSIC_GROUP_MAX_SIZE < \
>> +    IMSIC_GROUP_SIZE(RVSP_CPUS_MAX_BITS, RVSP_IRQCHIP_MAX_GUESTS_BITS)
>> +#error "Can't accomodate single IMSIC group in address space"
>> +#endif
>> +
>> +#define RVSP_IMSIC_MAX_SIZE            (RVSP_SOCKETS_MAX * \
>> +                                        RVSP_IMSIC_GROUP_MAX_SIZE)
>> +#if 0x4000000 < RVSP_IMSIC_MAX_SIZE
>> +#error "Can't accomodate all IMSIC groups in address space"
>> +#endif
>> +
>> +static const MemMapEntry rvsp_ref_memmap[] = {
>> +    [RVSP_DEBUG] =          {        0x0,         0x100 },
>> +    [RVSP_MROM] =           {     0x1000,        0xf000 },
>> +    [RVSP_TEST] =           {   0x100000,        0x1000 },
>> +    [RVSP_RTC] =            {   0x101000,        0x1000 },
>> +    [RVSP_ACLINT] =         {  0x2000000,       0x10000 },
>> +    [RVSP_PCIE_PIO] =       {  0x3000000,       0x10000 },
>> +    [RVSP_APLIC_M] =        {  0xc000000, APLIC_SIZE(RVSP_CPUS_MAX) },
>> +    [RVSP_APLIC_S] =        {  0xd000000, APLIC_SIZE(RVSP_CPUS_MAX) },
>> +    [RVSP_UART0] =          { 0x10000000,         0x100 },
>> +    [RVSP_FLASH] =          { 0x20000000,     0x4000000 },
>> +    [RVSP_IMSIC_M] =        { 0x24000000, RVSP_IMSIC_MAX_SIZE },
>> +    [RVSP_IMSIC_S] =        { 0x28000000, RVSP_IMSIC_MAX_SIZE },
>> +    [RVSP_PCIE_ECAM] =      { 0x30000000,    0x10000000 },
>> +    [RVSP_PCIE_MMIO] =      { 0x40000000,    0x40000000 },
>> +    [RVSP_DRAM] =           { 0x80000000, 0xff80000000ull },
>> +    [RVSP_PCIE_MMIO_HIGH] = { 0x10000000000ull, 0x10000000000ull },
>> +};
>> +
>> +#define RVSP_FLASH_SECTOR_SIZE (256 * KiB)
>> +
>> +static PFlashCFI01 *rvsp_flash_create1(RVSPMachineState *s,
>> +                                       const char *name,
>> +                                       const char *alias_prop_name)
>> +{
>> +    /*
>> +     * Create a single flash device.  We use the same parameters as
>> +     * the flash devices on the ARM virt board.
>> +     */
>> +    DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
>> +
>> +    qdev_prop_set_uint64(dev, "sector-length", RVSP_FLASH_SECTOR_SIZE);
>> +    qdev_prop_set_uint8(dev, "width", 4);
>> +    qdev_prop_set_uint8(dev, "device-width", 2);
>> +    qdev_prop_set_bit(dev, "big-endian", false);
>> +    qdev_prop_set_uint16(dev, "id0", 0x89);
>> +    qdev_prop_set_uint16(dev, "id1", 0x18);
>> +    qdev_prop_set_uint16(dev, "id2", 0x00);
>> +    qdev_prop_set_uint16(dev, "id3", 0x00);
>> +    qdev_prop_set_string(dev, "name", name);
>> +
>> +    object_property_add_child(OBJECT(s), name, OBJECT(dev));
>> +    object_property_add_alias(OBJECT(s), alias_prop_name,
>> +                              OBJECT(dev), "drive");
>> +
>> +    return PFLASH_CFI01(dev);
>> +}
>> +
>> +static void rvsp_flash_create(RVSPMachineState *s)
>> +{
>> +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
>> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
>> +}
> 
> I suggest explicitly creating both flash devices in
> rvsp_ref_machine_instance_init():
> 
OK.

>> +static void rvsp_ref_machine_instance_init(Object *obj)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> + +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
>> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
>> +}
> 
> You can then rename rvsp_flash_create1() to rvsp_flash_create().
> 
OK.

> 
>> +
>> +static void rvsp_flash_map1(PFlashCFI01 *flash,
>> +                            hwaddr base, hwaddr size,
>> +                            MemoryRegion *sysmem)
>> +{
>> +    DeviceState *dev = DEVICE(flash);
>> +
>> +    assert(QEMU_IS_ALIGNED(size, RVSP_FLASH_SECTOR_SIZE));
>> +    assert(size / RVSP_FLASH_SECTOR_SIZE <= UINT32_MAX);
>> +    qdev_prop_set_uint32(dev, "num-blocks", size /
>> RVSP_FLASH_SECTOR_SIZE);
>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>> +
>> +    memory_region_add_subregion(sysmem, base,
>> +                               
>> sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
>> +                                                       0));
>> +}
>> +
>> +static void rvsp_flash_map(RVSPMachineState *s,
>> +                           MemoryRegion *sysmem)
>> +{
>> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
>> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
>> +
>> +    rvsp_flash_map1(s->flash[0], flashbase, flashsize, sysmem);
>> +    rvsp_flash_map1(s->flash[1], flashbase + flashsize, flashsize,
>> sysmem);
>> +}
> 
> I suggest rename rvsp_flash_map() to rvsp_create_flash_maps() and rename
> the
> "1" functions to rvsp_flash_map().
> 
OK.

>> +
>> +static void create_pcie_irq_map(RVSPMachineState *s, void *fdt, char
>> *nodename,
>> +                                uint32_t irqchip_phandle)
>> +{
>> +    int pin, dev;
>> +    uint32_t irq_map_stride = 0;
>> +    uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
>> +                          FDT_MAX_INT_MAP_WIDTH] = {};
>> +    uint32_t *irq_map = full_irq_map;
>> +
>> +    /*
>> +     * This code creates a standard swizzle of interrupts such that
>> +     * each device's first interrupt is based on it's PCI_SLOT number.
>> +     * (See pci_swizzle_map_irq_fn())
>> +     *
>> +     * We only need one entry per interrupt in the table (not one per
>> +     * possible slot) seeing the interrupt-map-mask will allow the table
>> +     * to wrap to any number of devices.
>> +     */
>> +    for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
>> +        int devfn = dev * 0x8;
>> +
>> +        for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
>> +            int irq_nr = RVSP_PCIE_IRQ +
>> +                         ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
>> +            int i = 0;
>> +
>> +            /* Fill PCI address cells */
>> +            irq_map[i] = cpu_to_be32(devfn << 8);
>> +            i += FDT_PCI_ADDR_CELLS;
>> +
>> +            /* Fill PCI Interrupt cells */
>> +            irq_map[i] = cpu_to_be32(pin + 1);
>> +            i += FDT_PCI_INT_CELLS;
>> +
>> +            /* Fill interrupt controller phandle and cells */
>> +            irq_map[i++] = cpu_to_be32(irqchip_phandle);
>> +            irq_map[i++] = cpu_to_be32(irq_nr);
>> +            irq_map[i++] = cpu_to_be32(0x4);
>> +
>> +            if (!irq_map_stride) {
>> +                irq_map_stride = i;
>> +            }
>> +            irq_map += irq_map_stride;
>> +        }
>> +    }
>> +
>> +    qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
>> +                     GPEX_NUM_IRQS * GPEX_NUM_IRQS *
>> +                     irq_map_stride * sizeof(uint32_t));
>> +
>> +    qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
>> +                           0x1800, 0, 0, 0x7);
>> +}
>> +
>> +static void create_fdt_socket_cpus(RVSPMachineState *s, int socket,
>> +                                   char *clust_name, uint32_t *phandle,
>> +                                   uint32_t *intc_phandles)
>> +{
>> +    int cpu;
>> +    uint32_t cpu_phandle;
>> +    MachineState *ms = MACHINE(s);
>> +    char *name, *cpu_name, *core_name, *intc_name, *sv_name;
> 
> We did some changes in the FDT functions in hw/riscv/virt.c where we
> used a lot
> of "g_autofree" calls to avoid having to call g_free() every time. This
> was done
> in the context of fixing a mem leak.
> 
> I suggest doing the same in all the FDT functions you're creating for this
> new machine.
> 
> Also, it seems like some FDT functions we're using here is a copy/paste
> of what
> the 'virt' machine is doing, e.g. create_fdt_socket_memory(). I think
> it's worth
> thinking about moving these common functions somewhere else (e.g.
> hw/riscv/fdt_helper.c)
> to allow us to share code between the machines.
> 
Originally I want to seperate rvsp-ref from virt as much as possible, so
that rvsp-ref can be relatively stable and virt can be moving fast, the
changes in virt don't have side effects on rvsp-ref. Let me check if any
code is better to share.

>> +    bool is_32_bit = riscv_is_32bit(&s->soc[0]);
>> +    uint8_t satp_mode_max;
>> +
>> +    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
>> +        RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
>> +
>> +        cpu_phandle = (*phandle)++;
>> +
>> +        cpu_name = g_strdup_printf("/cpus/cpu@%d",
>> +            s->soc[socket].hartid_base + cpu);
>> +        qemu_fdt_add_subnode(ms->fdt, cpu_name);
>> +
>> +        if (cpu_ptr->cfg.satp_mode.supported != 0) {
>> +            satp_mode_max =
>> satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
>> +            sv_name = g_strdup_printf("riscv,%s",
>> +                                      satp_mode_str(satp_mode_max,
>> is_32_bit));
>> +            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type",
>> sv_name);
>> +            g_free(sv_name);
>> +        }
>> +
>> +        name = riscv_isa_string(cpu_ptr);
>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
>> +        g_free(name);
>> +
>> +        if (cpu_ptr->cfg.ext_zicbom) {
>> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name,
>> "riscv,cbom-block-size",
>> +                                  cpu_ptr->cfg.cbom_blocksize);
>> +        }
>> +
>> +        if (cpu_ptr->cfg.ext_zicboz) {
>> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name,
>> "riscv,cboz-block-size",
>> +                                  cpu_ptr->cfg.cboz_blocksize);
>> +        }
>> +
>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible",
>> "riscv");
>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
>> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
>> +            s->soc[socket].hartid_base + cpu);
>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type",
>> "cpu");
>> +        riscv_socket_fdt_write_id(ms, cpu_name, socket);
>> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle",
>> cpu_phandle);
>> +
>> +        intc_phandles[cpu] = (*phandle)++;
>> +
>> +        intc_name = g_strdup_printf("%s/interrupt-controller",
>> cpu_name);
>> +        qemu_fdt_add_subnode(ms->fdt, intc_name);
>> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
>> +            intc_phandles[cpu]);
>> +        qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
>> +            "riscv,cpu-intc");
>> +        qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller",
>> NULL, 0);
>> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells",
>> 1);
>> +
>> +        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
>> +        qemu_fdt_add_subnode(ms->fdt, core_name);
>> +        qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
>> +
>> +        g_free(core_name);
>> +        g_free(intc_name);
>> +        g_free(cpu_name);
>> +    }
>> +}
>> +
>> +static void create_fdt_socket_memory(RVSPMachineState *s,
>> +                                     const MemMapEntry *memmap, int
>> socket)
>> +{
>> +    char *mem_name;
>> +    uint64_t addr, size;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    addr = memmap[RVSP_DRAM].base + riscv_socket_mem_offset(ms, socket);
>> +    size = riscv_socket_mem_size(ms, socket);
>> +    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
>> +    qemu_fdt_add_subnode(ms->fdt, mem_name);
>> +    qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
>> +        addr >> 32, addr, size >> 32, size);
>> +    qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
>> +    riscv_socket_fdt_write_id(ms, mem_name, socket);
>> +    g_free(mem_name);
>> +}
>> +
>> +static void create_fdt_socket_aclint(RVSPMachineState *s,
>> +                                     const MemMapEntry *memmap, int
>> socket,
>> +                                     uint32_t *intc_phandles)
>> +{
>> +    int cpu;
>> +    char *name;
>> +    unsigned long addr, size;
>> +    uint32_t aclint_cells_size;
>> +    uint32_t *aclint_mtimer_cells;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts *
>> 2);
>> +
>> +    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
>> +        aclint_mtimer_cells[cpu * 2 + 0] =
>> cpu_to_be32(intc_phandles[cpu]);
>> +        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
>> +    }
>> +    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
>> +
>> +    addr = memmap[RVSP_ACLINT].base +
>> +           (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
>> +    size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
>> +
>> +    name = g_strdup_printf("/soc/mtimer@%lx", addr);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>> +        "riscv,aclint-mtimer");
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
>> +        0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
>> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
>> +        0x0, RISCV_ACLINT_DEFAULT_MTIME);
>> +    qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
>> +        aclint_mtimer_cells, aclint_cells_size);
>> +    riscv_socket_fdt_write_id(ms, name, socket);
>> +    g_free(name);
>> +
>> +    g_free(aclint_mtimer_cells);
>> +}
>> +
>> +static uint32_t imsic_num_bits(uint32_t count)
>> +{
>> +    uint32_t ret = 0;
>> +
>> +    while (BIT(ret) < count) {
>> +        ret++;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void create_fdt_one_imsic(RVSPMachineState *s, hwaddr base_addr,
>> +                                 uint32_t *intc_phandles, uint32_t
>> msi_phandle,
>> +                                 bool m_mode, uint32_t imsic_guest_bits)
>> +{
>> +    int cpu, socket;
>> +    char *imsic_name;
>> +    MachineState *ms = MACHINE(s);
>> +    int socket_count = riscv_socket_count(ms);
>> +    uint32_t imsic_max_hart_per_socket;
>> +    uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
>> +
>> +    imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
>> +    imsic_regs = g_new0(uint32_t, socket_count * 4);
>> +
>> +    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
>> +        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
>> +        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT :
>> IRQ_S_EXT);
>> +    }
>> +
>> +    imsic_max_hart_per_socket = 0;
>> +    for (socket = 0; socket < socket_count; socket++) {
>> +        imsic_addr = base_addr + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>> +        imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
>> +                     s->soc[socket].num_harts;
>> +        imsic_regs[socket * 4 + 0] = 0;
>> +        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
>> +        imsic_regs[socket * 4 + 2] = 0;
>> +        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
>> +        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
>> +            imsic_max_hart_per_socket = s->soc[socket].num_harts;
>> +        }
>> +    }
>> +
>> +    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned
>> long)base_addr);
>> +    qemu_fdt_add_subnode(ms->fdt, imsic_name);
>> +    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
>> "riscv,imsics");
>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
>> +                          FDT_IMSIC_INT_CELLS);
>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
>> NULL, 0);
>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
>> +                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
>> +                     socket_count * sizeof(uint32_t) * 4);
>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
>> +                     RVSP_IRQCHIP_NUM_MSIS);
>> +
>> +    if (imsic_guest_bits) {
>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>> "riscv,guest-index-bits",
>> +                              imsic_guest_bits);
>> +    }
>> +
>> +    if (socket_count > 1) {
>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>> "riscv,hart-index-bits",
>> +                             
>> imsic_num_bits(imsic_max_hart_per_socket));
>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>> "riscv,group-index-bits",
>> +                              imsic_num_bits(socket_count));
>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>> "riscv,group-index-shift",
>> +                              IMSIC_MMIO_GROUP_MIN_SHIFT);
>> +    }
>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
>> +
>> +    g_free(imsic_name);
>> +    g_free(imsic_regs);
>> +    g_free(imsic_cells);
>> +}
>> +
>> +static void create_fdt_imsic(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                             uint32_t *phandle, uint32_t *intc_phandles,
>> +                             uint32_t *msi_m_phandle, uint32_t
>> *msi_s_phandle)
>> +{
>> +    *msi_m_phandle = (*phandle)++;
>> +    *msi_s_phandle = (*phandle)++;
>> +
>> +    /* M-level IMSIC node */
>> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_M].base, intc_phandles,
>> +                         *msi_m_phandle, true, 0);
>> +
>> +    /* S-level IMSIC node */
>> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_S].base, intc_phandles,
>> +                         *msi_s_phandle, false,
>> +                         imsic_num_bits(s->aia_guests + 1));
>> +
>> +}
>> +
>> +static void create_fdt_one_aplic(RVSPMachineState *s, int socket,
>> +                                 unsigned long aplic_addr, uint32_t
>> aplic_size,
>> +                                 uint32_t msi_phandle,
>> +                                 uint32_t *intc_phandles,
>> +                                 uint32_t aplic_phandle,
>> +                                 uint32_t aplic_child_phandle,
>> +                                 bool m_mode, int num_harts)
>> +{
>> +    int cpu;
>> +    char *aplic_name;
>> +    uint32_t *aplic_cells;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    aplic_cells = g_new0(uint32_t, num_harts * 2);
>> +
>> +    for (cpu = 0; cpu < num_harts; cpu++) {
>> +        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
>> +        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT :
>> IRQ_S_EXT);
>> +    }
>> +
>> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
>> +    qemu_fdt_add_subnode(ms->fdt, aplic_name);
>> +    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible",
>> "riscv,aplic");
>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
>> +                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
>> +    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller",
>> NULL, 0);
>> +
>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
>> msi_phandle);
>> +
>> +    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
>> +                           0x0, aplic_addr, 0x0, aplic_size);
>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
>> +                          RVSP_IRQCHIP_NUM_SOURCES);
>> +
>> +    if (aplic_child_phandle) {
>> +        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
>> +                              aplic_child_phandle);
>> +        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
>> +                               aplic_child_phandle, 0x1,
>> +                               RVSP_IRQCHIP_NUM_SOURCES);
>> +    }
>> +
>> +    riscv_socket_fdt_write_id(ms, aplic_name, socket);
>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle",
>> aplic_phandle);
>> +
>> +    g_free(aplic_name);
>> +    g_free(aplic_cells);
>> +}
>> +
>> +static void create_fdt_socket_aplic(RVSPMachineState *s,
>> +                                    const MemMapEntry *memmap, int
>> socket,
>> +                                    uint32_t msi_m_phandle,
>> +                                    uint32_t msi_s_phandle,
>> +                                    uint32_t *phandle,
>> +                                    uint32_t *intc_phandles,
>> +                                    uint32_t *aplic_phandles,
>> +                                    int num_harts)
>> +{
>> +    char *aplic_name;
>> +    unsigned long aplic_addr;
>> +    uint32_t aplic_m_phandle, aplic_s_phandle;
>> +
>> +    aplic_m_phandle = (*phandle)++;
>> +    aplic_s_phandle = (*phandle)++;
>> +
>> +    /* M-level APLIC node */
>> +    aplic_addr = memmap[RVSP_APLIC_M].base +
>> +                 (memmap[RVSP_APLIC_M].size * socket);
>> +    create_fdt_one_aplic(s, socket, aplic_addr,
>> memmap[RVSP_APLIC_M].size,
>> +                         msi_m_phandle, intc_phandles,
>> +                         aplic_m_phandle, aplic_s_phandle,
>> +                         true, num_harts);
>> +
>> +    /* S-level APLIC node */
>> +    aplic_addr = memmap[RVSP_APLIC_S].base +
>> +                 (memmap[RVSP_APLIC_S].size * socket);
>> +    create_fdt_one_aplic(s, socket, aplic_addr,
>> memmap[RVSP_APLIC_S].size,
>> +                         msi_s_phandle, intc_phandles,
>> +                         aplic_s_phandle, 0,
>> +                         false, num_harts);
>> +
>> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
>> +
>> +    g_free(aplic_name);
>> +
>> +    aplic_phandles[socket] = aplic_s_phandle;
>> +}
>> +
>> +static void create_fdt_pmu(RVSPMachineState *s)
>> +{
>> +    char *pmu_name;
>> +    MachineState *ms = MACHINE(s);
>> +    RISCVCPU hart = s->soc[0].harts[0];
>> +
>> +    pmu_name = g_strdup_printf("/pmu");
>> +    qemu_fdt_add_subnode(ms->fdt, pmu_name);
>> +    qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible",
>> "riscv,pmu");
>> +    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
>> +
>> +    g_free(pmu_name);
>> +}
>> +
>> +static void create_fdt_sockets(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                               uint32_t *phandle,
>> +                               uint32_t *irq_mmio_phandle,
>> +                               uint32_t *irq_pcie_phandle,
>> +                               uint32_t *msi_pcie_phandle)
>> +{
>> +    char *clust_name;
>> +    int socket, phandle_pos;
>> +    MachineState *ms = MACHINE(s);
>> +    uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
>> +    uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
>> +    int socket_count = riscv_socket_count(ms);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, "/cpus");
>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
>> +                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
>> +    qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
>> +
>> +    intc_phandles = g_new0(uint32_t, ms->smp.cpus);
>> +
>> +    phandle_pos = ms->smp.cpus;
>> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
>> +        phandle_pos -= s->soc[socket].num_harts;
>> +
>> +        clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
>> +        qemu_fdt_add_subnode(ms->fdt, clust_name);
>> +
>> +        create_fdt_socket_cpus(s, socket, clust_name, phandle,
>> +                               &intc_phandles[phandle_pos]);
>> +
>> +        create_fdt_socket_memory(s, memmap, socket);
>> +
>> +        g_free(clust_name);
>> +
>> +        create_fdt_socket_aclint(s, memmap, socket,
>> +            &intc_phandles[phandle_pos]);
>> +    }
>> +
>> +    create_fdt_imsic(s, memmap, phandle, intc_phandles,
>> +        &msi_m_phandle, &msi_s_phandle);
>> +    *msi_pcie_phandle = msi_s_phandle;
>> +
>> +    phandle_pos = ms->smp.cpus;
>> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
>> +        phandle_pos -= s->soc[socket].num_harts;
>> +
>> +        create_fdt_socket_aplic(s, memmap, socket,
>> +                                msi_m_phandle, msi_s_phandle, phandle,
>> +                                &intc_phandles[phandle_pos],
>> +                                xplic_phandles,
>> +                                s->soc[socket].num_harts);
>> +    }
>> +
>> +    g_free(intc_phandles);
>> +
>> +    for (socket = 0; socket < socket_count; socket++) {
>> +        if (socket == 0) {
>> +            *irq_mmio_phandle = xplic_phandles[socket];
>> +            *irq_pcie_phandle = xplic_phandles[socket];
>> +        }
>> +        if (socket == 1) {
>> +            *irq_pcie_phandle = xplic_phandles[socket];
>> +        }
>> +    }
>> +
>> +    riscv_socket_fdt_write_distance_matrix(ms);
>> +}
>> +
>> +static void create_fdt_pcie(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                            uint32_t irq_pcie_phandle,
>> +                            uint32_t msi_pcie_phandle)
>> +{
>> +    char *name;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    name = g_strdup_printf("/soc/pci@%lx",
>> +        (long) memmap[RVSP_PCIE_ECAM].base);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
>> +        FDT_PCI_ADDR_CELLS);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
>> +        FDT_PCI_INT_CELLS);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>> +        "pci-host-ecam-generic");
>> +    qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
>> +        memmap[RVSP_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
>> +    qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent",
>> msi_pcie_phandle);
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
>> +        memmap[RVSP_PCIE_ECAM].base, 0, memmap[RVSP_PCIE_ECAM].size);
>> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
>> +        1, FDT_PCI_RANGE_IOPORT, 2, 0,
>> +        2, memmap[RVSP_PCIE_PIO].base, 2, memmap[RVSP_PCIE_PIO].size,
>> +        1, FDT_PCI_RANGE_MMIO,
>> +        2, memmap[RVSP_PCIE_MMIO].base,
>> +        2, memmap[RVSP_PCIE_MMIO].base, 2, memmap[RVSP_PCIE_MMIO].size,
>> +        1, FDT_PCI_RANGE_MMIO_64BIT,
>> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base,
>> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base, 2,
>> +           memmap[RVSP_PCIE_MMIO_HIGH].size);
>> +
>> +    create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
>> +    g_free(name);
>> +}
>> +
>> +static void create_fdt_reset(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                             uint32_t *phandle)
>> +{
>> +    char *name;
>> +    uint32_t test_phandle;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    test_phandle = (*phandle)++;
>> +    name = g_strdup_printf("/soc/test@%lx",
>> +        (long)memmap[RVSP_TEST].base);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    {
>> +        static const char * const compat[3] = {
>> +            "sifive,test1", "sifive,test0", "syscon"
>> +        };
>> +        qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
>> +                                      (char **)&compat,
>> ARRAY_SIZE(compat));
>> +    }
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>> +        0x0, memmap[RVSP_TEST].base, 0x0, memmap[RVSP_TEST].size);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
>> +    test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
>> +    g_free(name);
>> +
>> +    name = g_strdup_printf("/reboot");
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>> "syscon-reboot");
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
>> +    g_free(name);
>> +
>> +    name = g_strdup_printf("/poweroff");
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>> "syscon-poweroff");
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
>> +    g_free(name);
>> +}
>> +
>> +static void create_fdt_uart(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                            uint32_t irq_mmio_phandle)
>> +{
>> +    char *name;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    name = g_strdup_printf("/soc/serial@%lx",
>> (long)memmap[RVSP_UART0].base);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>> +        0x0, memmap[RVSP_UART0].base,
>> +        0x0, memmap[RVSP_UART0].size);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
>> irq_mmio_phandle);
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts",
>> RVSP_UART0_IRQ, 0x4);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, "/chosen");
>> +    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
>> +    g_free(name);
>> +}
>> +
>> +static void create_fdt_rtc(RVSPMachineState *s, const MemMapEntry
>> *memmap,
>> +                           uint32_t irq_mmio_phandle)
>> +{
>> +    char *name;
>> +    MachineState *ms = MACHINE(s);
>> +
>> +    name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[RVSP_RTC].base);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>> +        "google,goldfish-rtc");
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>> +        0x0, memmap[RVSP_RTC].base, 0x0, memmap[RVSP_RTC].size);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
>> +        irq_mmio_phandle);
>> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_RTC_IRQ,
>> 0x4);
>> +    g_free(name);
>> +}
>> +
>> +static void create_fdt_flash(RVSPMachineState *s, const MemMapEntry
>> *memmap)
>> +{
>> +    char *name;
>> +    MachineState *ms = MACHINE(s);
>> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
>> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
>> +
>> +    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
>> +    qemu_fdt_add_subnode(ms->fdt, name);
>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
>> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
>> +                                 2, flashbase, 2, flashsize,
>> +                                 2, flashbase + flashsize, 2,
>> flashsize);
>> +    qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
>> +    g_free(name);
>> +}
>> +
>> +static void create_fdt(RVSPMachineState *s, const MemMapEntry *memmap)
>> +{
>> +    MachineState *ms = MACHINE(s);
>> +    uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
>> +    uint32_t irq_pcie_phandle = 1;
>> +    uint8_t rng_seed[32];
>> +
>> +    ms->fdt = create_device_tree(&s->fdt_size);
>> +    if (!ms->fdt) {
>> +        error_report("create_device_tree() failed");
>> +        exit(1);
>> +    }
>> +
>> +    qemu_fdt_setprop_string(ms->fdt, "/", "model",
>> "riscv-rvsp-ref,qemu");
>> +    qemu_fdt_setprop_string(ms->fdt, "/", "compatible",
>> "riscv-rvsp-ref");
>> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
>> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, "/soc");
>> +    qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
>> +    qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible",
>> "simple-bus");
>> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
>> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
>> +
>> +    create_fdt_sockets(s, memmap, &phandle, &irq_mmio_phandle,
>> +                       &irq_pcie_phandle, &msi_pcie_phandle);
>> +
>> +    create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
>> +
>> +    create_fdt_reset(s, memmap, &phandle);
>> +
>> +    create_fdt_uart(s, memmap, irq_mmio_phandle);
>> +
>> +    create_fdt_rtc(s, memmap, irq_mmio_phandle);
>> +
>> +    create_fdt_flash(s, memmap);
>> +    create_fdt_pmu(s);
>> +
>> +    /* Pass seed to RNG */
>> +    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
>> +    qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
>> +                     rng_seed, sizeof(rng_seed));
>> +}
>> +
>> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
>> +                                          DeviceState *irqchip,
>> +                                          RVSPMachineState *s)
>> +{
>> +    DeviceState *dev;
>> +    PCIHostState *pci;
>> +    PCIDevice *pdev_ahci;
>> +    AHCIPCIState *ich9;
>> +    DriveInfo *hd[NUM_SATA_PORTS];
>> +    MemoryRegion *ecam_alias, *ecam_reg;
>> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
>> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
>> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
>> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
>> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
>> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
>> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
>> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
>> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
>> +    MachineClass *mc = MACHINE_GET_CLASS(s);
>> +    qemu_irq irq;
>> +    int i;
>> +
>> +    dev = qdev_new(TYPE_GPEX_HOST);
>> +
>> +    /* Set GPEX object properties for the rvsp ref machine */
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
>> +                            ecam_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
>> +                            ecam_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
>> +                             mmio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)),
>> PCI_HOST_BELOW_4G_MMIO_SIZE,
>> +                            mmio_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
>> +                             high_mmio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)),
>> PCI_HOST_ABOVE_4G_MMIO_SIZE,
>> +                            high_mmio_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
>> +                            pio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
>> +                            pio_size, NULL);
>> +
>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>> +
>> +    ecam_alias = g_new0(MemoryRegion, 1);
>> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
>> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
>> +                             ecam_reg, 0, ecam_size);
>> +    memory_region_add_subregion(get_system_memory(), ecam_base,
>> ecam_alias);
>> +
>> +    mmio_alias = g_new0(MemoryRegion, 1);
>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>> +                             mmio_reg, mmio_base, mmio_size);
>> +    memory_region_add_subregion(get_system_memory(), mmio_base,
>> mmio_alias);
>> +
>> +    /* Map high MMIO space */
>> +    high_mmio_alias = g_new0(MemoryRegion, 1);
>> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev),
>> "pcie-mmio-high",
>> +                             mmio_reg, high_mmio_base, high_mmio_size);
>> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
>> +                                high_mmio_alias);
>> +
>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
>> +
>> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
>> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
>> +
>> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
>> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
>> +    }
>> +
>> +    pci = PCI_HOST_BRIDGE(dev);
>> +    pci_init_nic_devices(pci->bus, mc->default_nic);
>> +    /* IDE disk setup.  */
>> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
>> +    ich9 = ICH9_AHCI(pdev_ahci);
>> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
>> +    ide_drive_get(hd, ich9->ahci.ports);
>> +    ahci_ide_create_devs(&ich9->ahci, hd);
>> +
>> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
>> +    return dev;
>> +}
>> +
>> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
>> +                                        const MemMapEntry *memmap,
>> int socket,
>> +                                        int base_hartid, int hart_count)
>> +{
>> +    int i;
>> +    hwaddr addr;
>> +    uint32_t guest_bits;
>> +    DeviceState *aplic_s = NULL;
>> +    DeviceState *aplic_m = NULL;
>> +    bool msimode = true;
>> +
>> +    /* Per-socket M-level IMSICs */
>> +    addr = memmap[RVSP_IMSIC_M].base +
>> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>> +    for (i = 0; i < hart_count; i++) {
>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
>> +                           base_hartid + i, true, 1,
>> +                           RVSP_IRQCHIP_NUM_MSIS);
>> +    }
>> +
>> +    /* Per-socket S-level IMSICs */
>> +    guest_bits = imsic_num_bits(aia_guests + 1);
>> +    addr = memmap[RVSP_IMSIC_S].base + socket *
>> RVSP_IMSIC_GROUP_MAX_SIZE;
>> +    for (i = 0; i < hart_count; i++) {
>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
>> +                           base_hartid + i, false, 1 + aia_guests,
>> +                           RVSP_IRQCHIP_NUM_MSIS);
>> +    }
>> +
>> +    /* Per-socket M-level APLIC */
>> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
>> +                                 socket * memmap[RVSP_APLIC_M].size,
>> +                                 memmap[RVSP_APLIC_M].size,
>> +                                 (msimode) ? 0 : base_hartid,
>> +                                 (msimode) ? 0 : hart_count,
>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>> +                                 msimode, true, NULL);
>> +
>> +    /* Per-socket S-level APLIC */
>> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
>> +                                 socket * memmap[RVSP_APLIC_S].size,
>> +                                 memmap[RVSP_APLIC_S].size,
>> +                                 (msimode) ? 0 : base_hartid,
>> +                                 (msimode) ? 0 : hart_count,
>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>> +                                 msimode, false, aplic_m);
>> +
>> +    (void)aplic_s;
>> +    return aplic_m;
>> +}
>> +
>> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
>> +{
>> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
>> +                                       machine_done);
>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>> +    MachineState *machine = MACHINE(s);
>> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
>> +    target_ulong firmware_end_addr, kernel_start_addr;
>> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
>> +    uint64_t fdt_load_addr;
>> +    uint64_t kernel_entry = 0;
>> +    BlockBackend *pflash_blk0;
>> +
>> +    /* load/create device tree */
>> +    if (machine->dtb) {
>> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
>> +        if (!machine->fdt) {
>> +            error_report("load_device_tree() failed");
>> +            exit(1);
>> +        }
>> +    } else {
>> +        create_fdt(s, memmap);
>> +    }
> 
> We fixed a bug in the 'virt' machine  that had to do with the
> guest-loader device
> not being properly configured. It had to do with the FDT not being
> properly generated.
> See 7a87ba8956e59 and https://gitlab.com/qemu-project/qemu/-/issues/1925
> for more info.
> 
> Long story short, the 'virt' machine introduced a 'finalize_fdt' step
> where some pieces
> of FDT is created during machine_done(), and some are created during
> init() via create_fdt().
> I think it's a good idea to consider doing that in this new machine to
> avoid this kind of
> problem.
> 
Sure, I will do. I started this work on a feature branch using virt.c as
the basis and ported it to upstream recently, so the recent changes in
virt.c are not integrated here.

Thanks,
Fei.

> 
> Thanks,
> 
> Daniel
> 
>> +
>> +    firmware_end_addr = riscv_find_and_load_firmware(machine,
>> firmware_name,
>> +                                                     start_addr, NULL);
>> +
>> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
>> +    if (pflash_blk0) {
>> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
>> +            /*
>> +             * Pflash was supplied but bios is none and not KVM guest,
>> +             * let's overwrite the address we jump to after reset to
>> +             * the base of the flash.
>> +             */
>> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
>> +        } else {
>> +            /*
>> +             * Pflash was supplied but either KVM guest or bios is
>> not none.
>> +             * In this case, base of the flash would contain S-mode
>> payload.
>> +             */
>> +            riscv_setup_firmware_boot(machine);
>> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
>> +        }
>> +    }
>> +
>> +    if (machine->kernel_filename && !kernel_entry) {
>> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>> +                                                        
>> firmware_end_addr);
>> +
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>> +                                         kernel_start_addr, true, NULL);
>> +    }
>> +
>> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
>> +                                           memmap[RVSP_DRAM].size,
>> +                                           machine);
>> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
>> +
>> +    /* load the reset vector */
>> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
>> +                              rvsp_ref_memmap[RVSP_MROM].base,
>> +                              rvsp_ref_memmap[RVSP_MROM].size,
>> kernel_entry,
>> +                              fdt_load_addr);
>> +
>> +}
>> +
>> +static void rvsp_ref_machine_init(MachineState *machine)
>> +{
>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
>> +    MemoryRegion *system_memory = get_system_memory();
>> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
>> +    char *soc_name;
>> +    DeviceState *mmio_irqchip, *pcie_irqchip;
>> +    int i, base_hartid, hart_count;
>> +    int socket_count = riscv_socket_count(machine);
>> +
>> +    /* Check socket count limit */
>> +    if (RVSP_SOCKETS_MAX < socket_count) {
>> +        error_report("number of sockets/nodes should be less than %d",
>> +            RVSP_SOCKETS_MAX);
>> +        exit(1);
>> +    }
>> +
>> +    if (!tcg_enabled()) {
>> +        error_report("'aclint' is only available with TCG
>> acceleration");
>> +        exit(1);
>> +    }
>> +
>> +    /* Initialize sockets */
>> +    mmio_irqchip = pcie_irqchip = NULL;
>> +    for (i = 0; i < socket_count; i++) {
>> +        if (!riscv_socket_check_hartids(machine, i)) {
>> +            error_report("discontinuous hartids in socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        base_hartid = riscv_socket_first_hartid(machine, i);
>> +        if (base_hartid < 0) {
>> +            error_report("can't find hartid base for socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        hart_count = riscv_socket_hart_count(machine, i);
>> +        if (hart_count < 0) {
>> +            error_report("can't find hart count for socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        soc_name = g_strdup_printf("soc%d", i);
>> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
>> +                                TYPE_RISCV_HART_ARRAY);
>> +        g_free(soc_name);
>> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
>> +                                machine->cpu_type, &error_abort);
>> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
>> +                                base_hartid, &error_abort);
>> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
>> +                                hart_count, &error_abort);
>> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
>> +
>> +        /* Per-socket ACLINT MTIMER */
>> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
>> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>> +            base_hartid, hart_count,
>> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
>> +            RISCV_ACLINT_DEFAULT_MTIME,
>> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
>> +
>> +        /* Per-socket interrupt controller */
>> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
>> +                                            memmap, i, base_hartid,
>> +                                            hart_count);
>> +
>> +        /* Try to use different IRQCHIP instance based device type */
>> +        if (i == 0) {
>> +            mmio_irqchip = s->irqchip[i];
>> +            pcie_irqchip = s->irqchip[i];
>> +        }
>> +        if (i == 1) {
>> +            pcie_irqchip = s->irqchip[i];
>> +        }
>> +    }
>> +
>> +    s->memmap = rvsp_ref_memmap;
>> +
>> +    /* register system main memory (actual RAM) */
>> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
>> +        machine->ram);
>> +
>> +    /* boot rom */
>> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
>> +                           memmap[RVSP_MROM].size, &error_fatal);
>> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
>> +                                mask_rom);
>> +
>> +    /* SiFive Test MMIO device */
>> +    sifive_test_create(memmap[RVSP_TEST].base);
>> +
>> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
>> +
>> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
>> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
>> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
>> +
>> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
>> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
>> +
>> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
>> +        /* Map legacy -drive if=pflash to machine properties */
>> +        pflash_cfi01_legacy_drive(s->flash[i],
>> +                                  drive_get(IF_PFLASH, 0, i));
>> +    }
>> +    rvsp_flash_map(s, system_memory);
>> +
>> +    s->machine_done.notify = rvsp_ref_machine_done;
>> +    qemu_add_machine_init_done_notifier(&s->machine_done);
>> +}
>> +
>> +static void rvsp_ref_machine_instance_init(Object *obj)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +
>> +    rvsp_flash_create(s);
>> +}
>> +
>> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +    char val[32];
>> +
>> +    sprintf(val, "%d", s->aia_guests);
>> +    return g_strdup(val);
>> +}
>> +
>> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val,
>> Error **errp)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +
>> +    s->aia_guests = atoi(val);
>> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
>> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
>> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
>> +                          RVSP_IRQCHIP_MAX_GUESTS);
>> +    }
>> +}
>> +
>> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
>> +{
>> +    char str[128];
>> +    MachineClass *mc = MACHINE_CLASS(oc);
>> +
>> +    mc->desc = "RISC-V Server SoC Reference board";
>> +    mc->init = rvsp_ref_machine_init;
>> +    mc->max_cpus = RVSP_CPUS_MAX;
>> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
>> +    mc->pci_allow_0_address = true;
>> +    mc->default_nic = "e1000e";
>> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
>> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
>> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
>> +    mc->numa_mem_supported = true;
>> +    /* platform instead of architectural choice */
>> +    mc->cpu_cluster_has_numa_boundary = true;
>> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
>> +
>> +    object_class_property_add_str(oc, "aia-guests",
>> +                                  rvsp_ref_get_aia_guests,
>> +                                  rvsp_ref_set_aia_guests);
>> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid
>> value "
>> +                 "should be between 0 and %d.",
>> RVSP_IRQCHIP_MAX_GUESTS);
>> +    object_class_property_set_description(oc, "aia-guests", str);
>> +}
>> +
>> +static const TypeInfo rvsp_ref_typeinfo = {
>> +    .name       = TYPE_RVSP_REF_MACHINE,
>> +    .parent     = TYPE_MACHINE,
>> +    .class_init = rvsp_ref_machine_class_init,
>> +    .instance_init = rvsp_ref_machine_instance_init,
>> +    .instance_size = sizeof(RVSPMachineState),
>> +};
>> +
>> +static void rvsp_ref_init_register_types(void)
>> +{
>> +    type_register_static(&rvsp_ref_typeinfo);
>> +}
>> +
>> +type_init(rvsp_ref_init_register_types)
Daniel Henrique Barboza March 5, 2024, 9:15 a.m. UTC | #3
On 3/5/24 02:56, Wu, Fei wrote:
> On 3/5/2024 3:35 AM, Daniel Henrique Barboza wrote:
>>
>>
>> On 3/4/24 07:25, Fei Wu wrote:
>>> The RISC-V Server Platform specification[1] defines a standardized set
>>> of hardware and software capabilities, that portable system software,
>>> such as OS and hypervisors can rely on being present in a RISC-V server
>>> platform.
>>>
>>> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
>>> short) machine type is added to provide a environment for firmware/OS
>>> development and testing. The main features included in rvsp-ref are:
>>>
>>>    - Based on riscv virt machine type
>>>    - A new memory map as close as virt machine as possible
>>>    - A new virt CPU type rvsp-ref-cpu for server platform compliance
>>>    - AIA
>>>    - PCIe AHCI
>>>    - PCIe NIC
>>>    - No virtio device
>>>    - No fw_cfg device
>>>    - No ACPI table provided
>>>    - Only minimal device tree nodes
>>>
>>> [1] https://github.com/riscv-non-isa/riscv-server-platform
>>>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> ---
>>>    configs/devices/riscv64-softmmu/default.mak |    1 +
>>>    hw/riscv/Kconfig                            |   13 +
>>>    hw/riscv/meson.build                        |    1 +
>>>    hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>>>    4 files changed, 1259 insertions(+)
>>>    create mode 100644 hw/riscv/server_platform_ref.c
>>>
>>> diff --git a/configs/devices/riscv64-softmmu/default.mak
>>> b/configs/devices/riscv64-softmmu/default.mak
>>> index 3f68059448..a1d98e49ef 100644
>>> --- a/configs/devices/riscv64-softmmu/default.mak
>>> +++ b/configs/devices/riscv64-softmmu/default.mak
>>> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>>>    CONFIG_SIFIVE_E=y
>>>    CONFIG_SIFIVE_U=y
>>>    CONFIG_RISCV_VIRT=y
>>> +CONFIG_SERVER_PLATFORM_REF=y
>>>    CONFIG_MICROCHIP_PFSOC=y
>>>    CONFIG_SHAKTI_C=y
>>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
>>> index 5d644eb7b1..debac5a7f5 100644
>>> --- a/hw/riscv/Kconfig
>>> +++ b/hw/riscv/Kconfig
>>> @@ -48,6 +48,19 @@ config RISCV_VIRT
>>>        select ACPI
>>>        select ACPI_PCI
>>>    +config SERVER_PLATFORM_REF
>>> +    bool
>>> +    select RISCV_NUMA
>>> +    select GOLDFISH_RTC
>>> +    select PCI
>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
>>> +    select PFLASH_CFI01
>>> +    select SERIAL
>>> +    select RISCV_ACLINT
>>> +    select RISCV_APLIC
>>> +    select RISCV_IMSIC
>>> +    select SIFIVE_TEST
>>> +
>>>    config SHAKTI_C
>>>        bool
>>>        select RISCV_ACLINT
>>> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
>>> index 2f7ee81be3..bb3aff91ea 100644
>>> --- a/hw/riscv/meson.build
>>> +++ b/hw/riscv/meson.build
>>> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true:
>>> files('numa.c'))
>>>    riscv_ss.add(files('riscv_hart.c'))
>>>    riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>>>    riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
>>> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true:
>>> files('server_platform_ref.c'))
>>>    riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>>>    riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>>>    riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
>>> diff --git a/hw/riscv/server_platform_ref.c
>>> b/hw/riscv/server_platform_ref.c
>>> new file mode 100644
>>> index 0000000000..ae90c4b27a
>>> --- /dev/null
>>> +++ b/hw/riscv/server_platform_ref.c
>>> @@ -0,0 +1,1244 @@
>>> +/*
>>> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
>>> + *
>>> + * Copyright (c) 2024 Intel, Inc.
>>> + *
>>> + * This board is compliant RISC-V Server platform specification and
>>> leveraging
>>> + * a lot of riscv virt code.
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> modify it
>>> + * under the terms and conditions of the GNU General Public License,
>>> + * version 2 or later, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope it will be useful, but
>>> WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>> License for
>>> + * more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
>>> along with
>>> + * this program.  If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "qemu/units.h"
>>> +#include "qemu/error-report.h"
>>> +#include "qemu/guest-random.h"
>>> +#include "qapi/error.h"
>>> +#include "qapi/qapi-visit-common.h"
>>> +#include "hw/boards.h"
>>> +#include "hw/loader.h"
>>> +#include "hw/sysbus.h"
>>> +#include "hw/qdev-properties.h"
>>> +#include "hw/char/serial.h"
>>> +#include "hw/block/flash.h"
>>> +#include "hw/ide/pci.h"
>>> +#include "hw/ide/ahci-pci.h"
>>> +#include "hw/pci/pci.h"
>>> +#include "hw/pci-host/gpex.h"
>>> +#include "hw/core/sysbus-fdt.h"
>>> +#include "hw/riscv/riscv_hart.h"
>>> +#include "hw/riscv/boot.h"
>>> +#include "hw/riscv/numa.h"
>>> +#include "hw/intc/riscv_aclint.h"
>>> +#include "hw/intc/riscv_aplic.h"
>>> +#include "hw/intc/riscv_imsic.h"
>>> +#include "hw/misc/sifive_test.h"
>>> +#include "chardev/char.h"
>>> +#include "sysemu/device_tree.h"
>>> +#include "sysemu/sysemu.h"
>>> +#include "sysemu/tcg.h"
>>> +#include "target/riscv/cpu.h"
>>> +#include "target/riscv/pmu.h"
>>> +#include "net/net.h"
>>> +
>>> +#define RVSP_CPUS_MAX_BITS             9
>>> +#define RVSP_CPUS_MAX                  (1 << RVSP_CPUS_MAX_BITS)
>>> +#define RVSP_SOCKETS_MAX_BITS          2
>>> +#define RVSP_SOCKETS_MAX               (1 << RVSP_SOCKETS_MAX_BITS)
>>> +
>>> +#define RVSP_IRQCHIP_NUM_MSIS 255
>>> +#define RVSP_IRQCHIP_NUM_SOURCES 96
>>> +#define RVSP_IRQCHIP_NUM_PRIO_BITS 3
>>> +#define RVSP_IRQCHIP_MAX_GUESTS_BITS 3
>>> +#define RVSP_IRQCHIP_MAX_GUESTS ((1U << RVSP_IRQCHIP_MAX_GUESTS_BITS)
>>> - 1U)
>>> +
>>> +#define FDT_PCI_ADDR_CELLS    3
>>> +#define FDT_PCI_INT_CELLS     1
>>> +#define FDT_APLIC_INT_CELLS   2
>>> +#define FDT_IMSIC_INT_CELLS   0
>>> +#define FDT_MAX_INT_CELLS     2
>>> +#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS
>>> + \
>>> +                                 1 + FDT_MAX_INT_CELLS)
>>> +#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS +
>>> FDT_PCI_INT_CELLS + \
>>> +                                 1 + FDT_APLIC_INT_CELLS)
>>> +
>>> +#define NUM_SATA_PORTS  6
>>> +
>>> +#define TYPE_RVSP_REF_MACHINE MACHINE_TYPE_NAME("rvsp-ref")
>>> +OBJECT_DECLARE_SIMPLE_TYPE(RVSPMachineState, RVSP_REF_MACHINE)
>>> +
>>> +struct RVSPMachineState {
>>> +    /*< private >*/
>>> +    MachineState parent;
>>> +
>>> +    /*< public >*/
>>> +    Notifier machine_done;
>>> +    RISCVHartArrayState soc[RVSP_SOCKETS_MAX];
>>> +    DeviceState *irqchip[RVSP_SOCKETS_MAX];
>>> +    PFlashCFI01 *flash[2];
>>> +
>>> +    int fdt_size;
>>> +    int aia_guests;
>>> +    const MemMapEntry *memmap;
>>> +};
>>> +
>>> +enum {
>>> +    RVSP_DEBUG,
>>> +    RVSP_MROM,
>>> +    RVSP_TEST,
>>> +    RVSP_RTC,
>>> +    RVSP_ACLINT,
>>> +    RVSP_APLIC_M,
>>> +    RVSP_APLIC_S,
>>> +    RVSP_UART0,
>>> +    RVSP_IMSIC_M,
>>> +    RVSP_IMSIC_S,
>>> +    RVSP_FLASH,
>>> +    RVSP_DRAM,
>>> +    RVSP_PCIE_MMIO,
>>> +    RVSP_PCIE_PIO,
>>> +    RVSP_PCIE_ECAM,
>>> +    RVSP_PCIE_MMIO_HIGH
>>> +};
>>> +
>>> +enum {
>>> +    RVSP_UART0_IRQ = 10,
>>> +    RVSP_RTC_IRQ = 11,
>>> +    RVSP_PCIE_IRQ = 0x20, /* 32 to 35 */
>>> +};
>>> +
>>> +/*
>>> + * The server soc reference machine physical address space used by
>>> some of the
>>> + * devices namely ACLINT, APLIC and IMSIC depend on number of
>>> Sockets, number
>>> + * of CPUs, and number of IMSIC guest files.
>>> + *
>>> + * Various limits defined by RVSP_SOCKETS_MAX_BITS,
>>> RVSP_CPUS_MAX_BITS, and
>>> + * RVSP_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization of
>>> server soc
>>> + * reference machine physical address space.
>>> + */
>>> +
>>> +#define RVSP_IMSIC_GROUP_MAX_SIZE      (1U <<
>>> IMSIC_MMIO_GROUP_MIN_SHIFT)
>>> +#if RVSP_IMSIC_GROUP_MAX_SIZE < \
>>> +    IMSIC_GROUP_SIZE(RVSP_CPUS_MAX_BITS, RVSP_IRQCHIP_MAX_GUESTS_BITS)
>>> +#error "Can't accomodate single IMSIC group in address space"
>>> +#endif
>>> +
>>> +#define RVSP_IMSIC_MAX_SIZE            (RVSP_SOCKETS_MAX * \
>>> +                                        RVSP_IMSIC_GROUP_MAX_SIZE)
>>> +#if 0x4000000 < RVSP_IMSIC_MAX_SIZE
>>> +#error "Can't accomodate all IMSIC groups in address space"
>>> +#endif
>>> +
>>> +static const MemMapEntry rvsp_ref_memmap[] = {
>>> +    [RVSP_DEBUG] =          {        0x0,         0x100 },
>>> +    [RVSP_MROM] =           {     0x1000,        0xf000 },
>>> +    [RVSP_TEST] =           {   0x100000,        0x1000 },
>>> +    [RVSP_RTC] =            {   0x101000,        0x1000 },
>>> +    [RVSP_ACLINT] =         {  0x2000000,       0x10000 },
>>> +    [RVSP_PCIE_PIO] =       {  0x3000000,       0x10000 },
>>> +    [RVSP_APLIC_M] =        {  0xc000000, APLIC_SIZE(RVSP_CPUS_MAX) },
>>> +    [RVSP_APLIC_S] =        {  0xd000000, APLIC_SIZE(RVSP_CPUS_MAX) },
>>> +    [RVSP_UART0] =          { 0x10000000,         0x100 },
>>> +    [RVSP_FLASH] =          { 0x20000000,     0x4000000 },
>>> +    [RVSP_IMSIC_M] =        { 0x24000000, RVSP_IMSIC_MAX_SIZE },
>>> +    [RVSP_IMSIC_S] =        { 0x28000000, RVSP_IMSIC_MAX_SIZE },
>>> +    [RVSP_PCIE_ECAM] =      { 0x30000000,    0x10000000 },
>>> +    [RVSP_PCIE_MMIO] =      { 0x40000000,    0x40000000 },
>>> +    [RVSP_DRAM] =           { 0x80000000, 0xff80000000ull },
>>> +    [RVSP_PCIE_MMIO_HIGH] = { 0x10000000000ull, 0x10000000000ull },
>>> +};
>>> +
>>> +#define RVSP_FLASH_SECTOR_SIZE (256 * KiB)
>>> +
>>> +static PFlashCFI01 *rvsp_flash_create1(RVSPMachineState *s,
>>> +                                       const char *name,
>>> +                                       const char *alias_prop_name)
>>> +{
>>> +    /*
>>> +     * Create a single flash device.  We use the same parameters as
>>> +     * the flash devices on the ARM virt board.
>>> +     */
>>> +    DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
>>> +
>>> +    qdev_prop_set_uint64(dev, "sector-length", RVSP_FLASH_SECTOR_SIZE);
>>> +    qdev_prop_set_uint8(dev, "width", 4);
>>> +    qdev_prop_set_uint8(dev, "device-width", 2);
>>> +    qdev_prop_set_bit(dev, "big-endian", false);
>>> +    qdev_prop_set_uint16(dev, "id0", 0x89);
>>> +    qdev_prop_set_uint16(dev, "id1", 0x18);
>>> +    qdev_prop_set_uint16(dev, "id2", 0x00);
>>> +    qdev_prop_set_uint16(dev, "id3", 0x00);
>>> +    qdev_prop_set_string(dev, "name", name);
>>> +
>>> +    object_property_add_child(OBJECT(s), name, OBJECT(dev));
>>> +    object_property_add_alias(OBJECT(s), alias_prop_name,
>>> +                              OBJECT(dev), "drive");
>>> +
>>> +    return PFLASH_CFI01(dev);
>>> +}
>>> +
>>> +static void rvsp_flash_create(RVSPMachineState *s)
>>> +{
>>> +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
>>> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
>>> +}
>>
>> I suggest explicitly creating both flash devices in
>> rvsp_ref_machine_instance_init():
>>
> OK.
> 
>>> +static void rvsp_ref_machine_instance_init(Object *obj)
>>> +{
>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>> + +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
>>> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
>>> +}
>>
>> You can then rename rvsp_flash_create1() to rvsp_flash_create().
>>
> OK.
> 
>>
>>> +
>>> +static void rvsp_flash_map1(PFlashCFI01 *flash,
>>> +                            hwaddr base, hwaddr size,
>>> +                            MemoryRegion *sysmem)
>>> +{
>>> +    DeviceState *dev = DEVICE(flash);
>>> +
>>> +    assert(QEMU_IS_ALIGNED(size, RVSP_FLASH_SECTOR_SIZE));
>>> +    assert(size / RVSP_FLASH_SECTOR_SIZE <= UINT32_MAX);
>>> +    qdev_prop_set_uint32(dev, "num-blocks", size /
>>> RVSP_FLASH_SECTOR_SIZE);
>>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>>> +
>>> +    memory_region_add_subregion(sysmem, base,
>>> +
>>> sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
>>> +                                                       0));
>>> +}
>>> +
>>> +static void rvsp_flash_map(RVSPMachineState *s,
>>> +                           MemoryRegion *sysmem)
>>> +{
>>> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
>>> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
>>> +
>>> +    rvsp_flash_map1(s->flash[0], flashbase, flashsize, sysmem);
>>> +    rvsp_flash_map1(s->flash[1], flashbase + flashsize, flashsize,
>>> sysmem);
>>> +}
>>
>> I suggest rename rvsp_flash_map() to rvsp_create_flash_maps() and rename
>> the
>> "1" functions to rvsp_flash_map().
>>
> OK.
> 
>>> +
>>> +static void create_pcie_irq_map(RVSPMachineState *s, void *fdt, char
>>> *nodename,
>>> +                                uint32_t irqchip_phandle)
>>> +{
>>> +    int pin, dev;
>>> +    uint32_t irq_map_stride = 0;
>>> +    uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
>>> +                          FDT_MAX_INT_MAP_WIDTH] = {};
>>> +    uint32_t *irq_map = full_irq_map;
>>> +
>>> +    /*
>>> +     * This code creates a standard swizzle of interrupts such that
>>> +     * each device's first interrupt is based on it's PCI_SLOT number.
>>> +     * (See pci_swizzle_map_irq_fn())
>>> +     *
>>> +     * We only need one entry per interrupt in the table (not one per
>>> +     * possible slot) seeing the interrupt-map-mask will allow the table
>>> +     * to wrap to any number of devices.
>>> +     */
>>> +    for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
>>> +        int devfn = dev * 0x8;
>>> +
>>> +        for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
>>> +            int irq_nr = RVSP_PCIE_IRQ +
>>> +                         ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
>>> +            int i = 0;
>>> +
>>> +            /* Fill PCI address cells */
>>> +            irq_map[i] = cpu_to_be32(devfn << 8);
>>> +            i += FDT_PCI_ADDR_CELLS;
>>> +
>>> +            /* Fill PCI Interrupt cells */
>>> +            irq_map[i] = cpu_to_be32(pin + 1);
>>> +            i += FDT_PCI_INT_CELLS;
>>> +
>>> +            /* Fill interrupt controller phandle and cells */
>>> +            irq_map[i++] = cpu_to_be32(irqchip_phandle);
>>> +            irq_map[i++] = cpu_to_be32(irq_nr);
>>> +            irq_map[i++] = cpu_to_be32(0x4);
>>> +
>>> +            if (!irq_map_stride) {
>>> +                irq_map_stride = i;
>>> +            }
>>> +            irq_map += irq_map_stride;
>>> +        }
>>> +    }
>>> +
>>> +    qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
>>> +                     GPEX_NUM_IRQS * GPEX_NUM_IRQS *
>>> +                     irq_map_stride * sizeof(uint32_t));
>>> +
>>> +    qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
>>> +                           0x1800, 0, 0, 0x7);
>>> +}
>>> +
>>> +static void create_fdt_socket_cpus(RVSPMachineState *s, int socket,
>>> +                                   char *clust_name, uint32_t *phandle,
>>> +                                   uint32_t *intc_phandles)
>>> +{
>>> +    int cpu;
>>> +    uint32_t cpu_phandle;
>>> +    MachineState *ms = MACHINE(s);
>>> +    char *name, *cpu_name, *core_name, *intc_name, *sv_name;
>>
>> We did some changes in the FDT functions in hw/riscv/virt.c where we
>> used a lot
>> of "g_autofree" calls to avoid having to call g_free() every time. This
>> was done
>> in the context of fixing a mem leak.
>>
>> I suggest doing the same in all the FDT functions you're creating for this
>> new machine.
>>
>> Also, it seems like some FDT functions we're using here is a copy/paste
>> of what
>> the 'virt' machine is doing, e.g. create_fdt_socket_memory(). I think
>> it's worth
>> thinking about moving these common functions somewhere else (e.g.
>> hw/riscv/fdt_helper.c)
>> to allow us to share code between the machines.
>>
> Originally I want to seperate rvsp-ref from virt as much as possible, so
> that rvsp-ref can be relatively stable and virt can be moving fast, the
> changes in virt don't have side effects on rvsp-ref. Let me check if any
> code is better to share.

In theory we can't change the FDT generation of the 'virt' machine since existing
software is now counting on it, but I see your point. I'll let you decide what is
worth sharing or not or whether we need to share anything at all.

All this said, I still advise using g_autofree in the strings and uint32_t pointers,
like 'virt' is doing, to avoid pesty mem leaks when moving/copying code around and
a g_free() is gone missing in the process.


Thanks,

Daniel

> 
>>> +    bool is_32_bit = riscv_is_32bit(&s->soc[0]);
>>> +    uint8_t satp_mode_max;
>>> +
>>> +    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
>>> +        RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
>>> +
>>> +        cpu_phandle = (*phandle)++;
>>> +
>>> +        cpu_name = g_strdup_printf("/cpus/cpu@%d",
>>> +            s->soc[socket].hartid_base + cpu);
>>> +        qemu_fdt_add_subnode(ms->fdt, cpu_name);
>>> +
>>> +        if (cpu_ptr->cfg.satp_mode.supported != 0) {
>>> +            satp_mode_max =
>>> satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
>>> +            sv_name = g_strdup_printf("riscv,%s",
>>> +                                      satp_mode_str(satp_mode_max,
>>> is_32_bit));
>>> +            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type",
>>> sv_name);
>>> +            g_free(sv_name);
>>> +        }
>>> +
>>> +        name = riscv_isa_string(cpu_ptr);
>>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
>>> +        g_free(name);
>>> +
>>> +        if (cpu_ptr->cfg.ext_zicbom) {
>>> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name,
>>> "riscv,cbom-block-size",
>>> +                                  cpu_ptr->cfg.cbom_blocksize);
>>> +        }
>>> +
>>> +        if (cpu_ptr->cfg.ext_zicboz) {
>>> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name,
>>> "riscv,cboz-block-size",
>>> +                                  cpu_ptr->cfg.cboz_blocksize);
>>> +        }
>>> +
>>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible",
>>> "riscv");
>>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
>>> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
>>> +            s->soc[socket].hartid_base + cpu);
>>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type",
>>> "cpu");
>>> +        riscv_socket_fdt_write_id(ms, cpu_name, socket);
>>> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle",
>>> cpu_phandle);
>>> +
>>> +        intc_phandles[cpu] = (*phandle)++;
>>> +
>>> +        intc_name = g_strdup_printf("%s/interrupt-controller",
>>> cpu_name);
>>> +        qemu_fdt_add_subnode(ms->fdt, intc_name);
>>> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
>>> +            intc_phandles[cpu]);
>>> +        qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
>>> +            "riscv,cpu-intc");
>>> +        qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller",
>>> NULL, 0);
>>> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells",
>>> 1);
>>> +
>>> +        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
>>> +        qemu_fdt_add_subnode(ms->fdt, core_name);
>>> +        qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
>>> +
>>> +        g_free(core_name);
>>> +        g_free(intc_name);
>>> +        g_free(cpu_name);
>>> +    }
>>> +}
>>> +
>>> +static void create_fdt_socket_memory(RVSPMachineState *s,
>>> +                                     const MemMapEntry *memmap, int
>>> socket)
>>> +{
>>> +    char *mem_name;
>>> +    uint64_t addr, size;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    addr = memmap[RVSP_DRAM].base + riscv_socket_mem_offset(ms, socket);
>>> +    size = riscv_socket_mem_size(ms, socket);
>>> +    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
>>> +    qemu_fdt_add_subnode(ms->fdt, mem_name);
>>> +    qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
>>> +        addr >> 32, addr, size >> 32, size);
>>> +    qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
>>> +    riscv_socket_fdt_write_id(ms, mem_name, socket);
>>> +    g_free(mem_name);
>>> +}
>>> +
>>> +static void create_fdt_socket_aclint(RVSPMachineState *s,
>>> +                                     const MemMapEntry *memmap, int
>>> socket,
>>> +                                     uint32_t *intc_phandles)
>>> +{
>>> +    int cpu;
>>> +    char *name;
>>> +    unsigned long addr, size;
>>> +    uint32_t aclint_cells_size;
>>> +    uint32_t *aclint_mtimer_cells;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts *
>>> 2);
>>> +
>>> +    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
>>> +        aclint_mtimer_cells[cpu * 2 + 0] =
>>> cpu_to_be32(intc_phandles[cpu]);
>>> +        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
>>> +    }
>>> +    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
>>> +
>>> +    addr = memmap[RVSP_ACLINT].base +
>>> +           (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
>>> +    size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
>>> +
>>> +    name = g_strdup_printf("/soc/mtimer@%lx", addr);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>>> +        "riscv,aclint-mtimer");
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>>> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
>>> +        0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
>>> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
>>> +        0x0, RISCV_ACLINT_DEFAULT_MTIME);
>>> +    qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
>>> +        aclint_mtimer_cells, aclint_cells_size);
>>> +    riscv_socket_fdt_write_id(ms, name, socket);
>>> +    g_free(name);
>>> +
>>> +    g_free(aclint_mtimer_cells);
>>> +}
>>> +
>>> +static uint32_t imsic_num_bits(uint32_t count)
>>> +{
>>> +    uint32_t ret = 0;
>>> +
>>> +    while (BIT(ret) < count) {
>>> +        ret++;
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static void create_fdt_one_imsic(RVSPMachineState *s, hwaddr base_addr,
>>> +                                 uint32_t *intc_phandles, uint32_t
>>> msi_phandle,
>>> +                                 bool m_mode, uint32_t imsic_guest_bits)
>>> +{
>>> +    int cpu, socket;
>>> +    char *imsic_name;
>>> +    MachineState *ms = MACHINE(s);
>>> +    int socket_count = riscv_socket_count(ms);
>>> +    uint32_t imsic_max_hart_per_socket;
>>> +    uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
>>> +
>>> +    imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
>>> +    imsic_regs = g_new0(uint32_t, socket_count * 4);
>>> +
>>> +    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
>>> +        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
>>> +        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT :
>>> IRQ_S_EXT);
>>> +    }
>>> +
>>> +    imsic_max_hart_per_socket = 0;
>>> +    for (socket = 0; socket < socket_count; socket++) {
>>> +        imsic_addr = base_addr + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>>> +        imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
>>> +                     s->soc[socket].num_harts;
>>> +        imsic_regs[socket * 4 + 0] = 0;
>>> +        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
>>> +        imsic_regs[socket * 4 + 2] = 0;
>>> +        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
>>> +        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
>>> +            imsic_max_hart_per_socket = s->soc[socket].num_harts;
>>> +        }
>>> +    }
>>> +
>>> +    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned
>>> long)base_addr);
>>> +    qemu_fdt_add_subnode(ms->fdt, imsic_name);
>>> +    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
>>> "riscv,imsics");
>>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
>>> +                          FDT_IMSIC_INT_CELLS);
>>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
>>> NULL, 0);
>>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
>>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
>>> +                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
>>> +    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
>>> +                     socket_count * sizeof(uint32_t) * 4);
>>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
>>> +                     RVSP_IRQCHIP_NUM_MSIS);
>>> +
>>> +    if (imsic_guest_bits) {
>>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>>> "riscv,guest-index-bits",
>>> +                              imsic_guest_bits);
>>> +    }
>>> +
>>> +    if (socket_count > 1) {
>>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>>> "riscv,hart-index-bits",
>>> +
>>> imsic_num_bits(imsic_max_hart_per_socket));
>>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>>> "riscv,group-index-bits",
>>> +                              imsic_num_bits(socket_count));
>>> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name,
>>> "riscv,group-index-shift",
>>> +                              IMSIC_MMIO_GROUP_MIN_SHIFT);
>>> +    }
>>> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
>>> +
>>> +    g_free(imsic_name);
>>> +    g_free(imsic_regs);
>>> +    g_free(imsic_cells);
>>> +}
>>> +
>>> +static void create_fdt_imsic(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                             uint32_t *phandle, uint32_t *intc_phandles,
>>> +                             uint32_t *msi_m_phandle, uint32_t
>>> *msi_s_phandle)
>>> +{
>>> +    *msi_m_phandle = (*phandle)++;
>>> +    *msi_s_phandle = (*phandle)++;
>>> +
>>> +    /* M-level IMSIC node */
>>> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_M].base, intc_phandles,
>>> +                         *msi_m_phandle, true, 0);
>>> +
>>> +    /* S-level IMSIC node */
>>> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_S].base, intc_phandles,
>>> +                         *msi_s_phandle, false,
>>> +                         imsic_num_bits(s->aia_guests + 1));
>>> +
>>> +}
>>> +
>>> +static void create_fdt_one_aplic(RVSPMachineState *s, int socket,
>>> +                                 unsigned long aplic_addr, uint32_t
>>> aplic_size,
>>> +                                 uint32_t msi_phandle,
>>> +                                 uint32_t *intc_phandles,
>>> +                                 uint32_t aplic_phandle,
>>> +                                 uint32_t aplic_child_phandle,
>>> +                                 bool m_mode, int num_harts)
>>> +{
>>> +    int cpu;
>>> +    char *aplic_name;
>>> +    uint32_t *aplic_cells;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    aplic_cells = g_new0(uint32_t, num_harts * 2);
>>> +
>>> +    for (cpu = 0; cpu < num_harts; cpu++) {
>>> +        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
>>> +        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT :
>>> IRQ_S_EXT);
>>> +    }
>>> +
>>> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
>>> +    qemu_fdt_add_subnode(ms->fdt, aplic_name);
>>> +    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible",
>>> "riscv,aplic");
>>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
>>> +                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
>>> +    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller",
>>> NULL, 0);
>>> +
>>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
>>> msi_phandle);
>>> +
>>> +    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
>>> +                           0x0, aplic_addr, 0x0, aplic_size);
>>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
>>> +                          RVSP_IRQCHIP_NUM_SOURCES);
>>> +
>>> +    if (aplic_child_phandle) {
>>> +        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
>>> +                              aplic_child_phandle);
>>> +        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
>>> +                               aplic_child_phandle, 0x1,
>>> +                               RVSP_IRQCHIP_NUM_SOURCES);
>>> +    }
>>> +
>>> +    riscv_socket_fdt_write_id(ms, aplic_name, socket);
>>> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle",
>>> aplic_phandle);
>>> +
>>> +    g_free(aplic_name);
>>> +    g_free(aplic_cells);
>>> +}
>>> +
>>> +static void create_fdt_socket_aplic(RVSPMachineState *s,
>>> +                                    const MemMapEntry *memmap, int
>>> socket,
>>> +                                    uint32_t msi_m_phandle,
>>> +                                    uint32_t msi_s_phandle,
>>> +                                    uint32_t *phandle,
>>> +                                    uint32_t *intc_phandles,
>>> +                                    uint32_t *aplic_phandles,
>>> +                                    int num_harts)
>>> +{
>>> +    char *aplic_name;
>>> +    unsigned long aplic_addr;
>>> +    uint32_t aplic_m_phandle, aplic_s_phandle;
>>> +
>>> +    aplic_m_phandle = (*phandle)++;
>>> +    aplic_s_phandle = (*phandle)++;
>>> +
>>> +    /* M-level APLIC node */
>>> +    aplic_addr = memmap[RVSP_APLIC_M].base +
>>> +                 (memmap[RVSP_APLIC_M].size * socket);
>>> +    create_fdt_one_aplic(s, socket, aplic_addr,
>>> memmap[RVSP_APLIC_M].size,
>>> +                         msi_m_phandle, intc_phandles,
>>> +                         aplic_m_phandle, aplic_s_phandle,
>>> +                         true, num_harts);
>>> +
>>> +    /* S-level APLIC node */
>>> +    aplic_addr = memmap[RVSP_APLIC_S].base +
>>> +                 (memmap[RVSP_APLIC_S].size * socket);
>>> +    create_fdt_one_aplic(s, socket, aplic_addr,
>>> memmap[RVSP_APLIC_S].size,
>>> +                         msi_s_phandle, intc_phandles,
>>> +                         aplic_s_phandle, 0,
>>> +                         false, num_harts);
>>> +
>>> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
>>> +
>>> +    g_free(aplic_name);
>>> +
>>> +    aplic_phandles[socket] = aplic_s_phandle;
>>> +}
>>> +
>>> +static void create_fdt_pmu(RVSPMachineState *s)
>>> +{
>>> +    char *pmu_name;
>>> +    MachineState *ms = MACHINE(s);
>>> +    RISCVCPU hart = s->soc[0].harts[0];
>>> +
>>> +    pmu_name = g_strdup_printf("/pmu");
>>> +    qemu_fdt_add_subnode(ms->fdt, pmu_name);
>>> +    qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible",
>>> "riscv,pmu");
>>> +    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
>>> +
>>> +    g_free(pmu_name);
>>> +}
>>> +
>>> +static void create_fdt_sockets(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                               uint32_t *phandle,
>>> +                               uint32_t *irq_mmio_phandle,
>>> +                               uint32_t *irq_pcie_phandle,
>>> +                               uint32_t *msi_pcie_phandle)
>>> +{
>>> +    char *clust_name;
>>> +    int socket, phandle_pos;
>>> +    MachineState *ms = MACHINE(s);
>>> +    uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
>>> +    uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
>>> +    int socket_count = riscv_socket_count(ms);
>>> +
>>> +    qemu_fdt_add_subnode(ms->fdt, "/cpus");
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
>>> +                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
>>> +    qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
>>> +
>>> +    intc_phandles = g_new0(uint32_t, ms->smp.cpus);
>>> +
>>> +    phandle_pos = ms->smp.cpus;
>>> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
>>> +        phandle_pos -= s->soc[socket].num_harts;
>>> +
>>> +        clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
>>> +        qemu_fdt_add_subnode(ms->fdt, clust_name);
>>> +
>>> +        create_fdt_socket_cpus(s, socket, clust_name, phandle,
>>> +                               &intc_phandles[phandle_pos]);
>>> +
>>> +        create_fdt_socket_memory(s, memmap, socket);
>>> +
>>> +        g_free(clust_name);
>>> +
>>> +        create_fdt_socket_aclint(s, memmap, socket,
>>> +            &intc_phandles[phandle_pos]);
>>> +    }
>>> +
>>> +    create_fdt_imsic(s, memmap, phandle, intc_phandles,
>>> +        &msi_m_phandle, &msi_s_phandle);
>>> +    *msi_pcie_phandle = msi_s_phandle;
>>> +
>>> +    phandle_pos = ms->smp.cpus;
>>> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
>>> +        phandle_pos -= s->soc[socket].num_harts;
>>> +
>>> +        create_fdt_socket_aplic(s, memmap, socket,
>>> +                                msi_m_phandle, msi_s_phandle, phandle,
>>> +                                &intc_phandles[phandle_pos],
>>> +                                xplic_phandles,
>>> +                                s->soc[socket].num_harts);
>>> +    }
>>> +
>>> +    g_free(intc_phandles);
>>> +
>>> +    for (socket = 0; socket < socket_count; socket++) {
>>> +        if (socket == 0) {
>>> +            *irq_mmio_phandle = xplic_phandles[socket];
>>> +            *irq_pcie_phandle = xplic_phandles[socket];
>>> +        }
>>> +        if (socket == 1) {
>>> +            *irq_pcie_phandle = xplic_phandles[socket];
>>> +        }
>>> +    }
>>> +
>>> +    riscv_socket_fdt_write_distance_matrix(ms);
>>> +}
>>> +
>>> +static void create_fdt_pcie(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                            uint32_t irq_pcie_phandle,
>>> +                            uint32_t msi_pcie_phandle)
>>> +{
>>> +    char *name;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    name = g_strdup_printf("/soc/pci@%lx",
>>> +        (long) memmap[RVSP_PCIE_ECAM].base);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
>>> +        FDT_PCI_ADDR_CELLS);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
>>> +        FDT_PCI_INT_CELLS);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>>> +        "pci-host-ecam-generic");
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
>>> +        memmap[RVSP_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
>>> +    qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent",
>>> msi_pcie_phandle);
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
>>> +        memmap[RVSP_PCIE_ECAM].base, 0, memmap[RVSP_PCIE_ECAM].size);
>>> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
>>> +        1, FDT_PCI_RANGE_IOPORT, 2, 0,
>>> +        2, memmap[RVSP_PCIE_PIO].base, 2, memmap[RVSP_PCIE_PIO].size,
>>> +        1, FDT_PCI_RANGE_MMIO,
>>> +        2, memmap[RVSP_PCIE_MMIO].base,
>>> +        2, memmap[RVSP_PCIE_MMIO].base, 2, memmap[RVSP_PCIE_MMIO].size,
>>> +        1, FDT_PCI_RANGE_MMIO_64BIT,
>>> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base,
>>> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base, 2,
>>> +           memmap[RVSP_PCIE_MMIO_HIGH].size);
>>> +
>>> +    create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
>>> +    g_free(name);
>>> +}
>>> +
>>> +static void create_fdt_reset(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                             uint32_t *phandle)
>>> +{
>>> +    char *name;
>>> +    uint32_t test_phandle;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    test_phandle = (*phandle)++;
>>> +    name = g_strdup_printf("/soc/test@%lx",
>>> +        (long)memmap[RVSP_TEST].base);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    {
>>> +        static const char * const compat[3] = {
>>> +            "sifive,test1", "sifive,test0", "syscon"
>>> +        };
>>> +        qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
>>> +                                      (char **)&compat,
>>> ARRAY_SIZE(compat));
>>> +    }
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>>> +        0x0, memmap[RVSP_TEST].base, 0x0, memmap[RVSP_TEST].size);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
>>> +    test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
>>> +    g_free(name);
>>> +
>>> +    name = g_strdup_printf("/reboot");
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>>> "syscon-reboot");
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
>>> +    g_free(name);
>>> +
>>> +    name = g_strdup_printf("/poweroff");
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>>> "syscon-poweroff");
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
>>> +    g_free(name);
>>> +}
>>> +
>>> +static void create_fdt_uart(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                            uint32_t irq_mmio_phandle)
>>> +{
>>> +    char *name;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    name = g_strdup_printf("/soc/serial@%lx",
>>> (long)memmap[RVSP_UART0].base);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>>> +        0x0, memmap[RVSP_UART0].base,
>>> +        0x0, memmap[RVSP_UART0].size);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
>>> irq_mmio_phandle);
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts",
>>> RVSP_UART0_IRQ, 0x4);
>>> +
>>> +    qemu_fdt_add_subnode(ms->fdt, "/chosen");
>>> +    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
>>> +    g_free(name);
>>> +}
>>> +
>>> +static void create_fdt_rtc(RVSPMachineState *s, const MemMapEntry
>>> *memmap,
>>> +                           uint32_t irq_mmio_phandle)
>>> +{
>>> +    char *name;
>>> +    MachineState *ms = MACHINE(s);
>>> +
>>> +    name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[RVSP_RTC].base);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
>>> +        "google,goldfish-rtc");
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
>>> +        0x0, memmap[RVSP_RTC].base, 0x0, memmap[RVSP_RTC].size);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
>>> +        irq_mmio_phandle);
>>> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_RTC_IRQ,
>>> 0x4);
>>> +    g_free(name);
>>> +}
>>> +
>>> +static void create_fdt_flash(RVSPMachineState *s, const MemMapEntry
>>> *memmap)
>>> +{
>>> +    char *name;
>>> +    MachineState *ms = MACHINE(s);
>>> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
>>> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
>>> +
>>> +    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
>>> +    qemu_fdt_add_subnode(ms->fdt, name);
>>> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
>>> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
>>> +                                 2, flashbase, 2, flashsize,
>>> +                                 2, flashbase + flashsize, 2,
>>> flashsize);
>>> +    qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
>>> +    g_free(name);
>>> +}
>>> +
>>> +static void create_fdt(RVSPMachineState *s, const MemMapEntry *memmap)
>>> +{
>>> +    MachineState *ms = MACHINE(s);
>>> +    uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
>>> +    uint32_t irq_pcie_phandle = 1;
>>> +    uint8_t rng_seed[32];
>>> +
>>> +    ms->fdt = create_device_tree(&s->fdt_size);
>>> +    if (!ms->fdt) {
>>> +        error_report("create_device_tree() failed");
>>> +        exit(1);
>>> +    }
>>> +
>>> +    qemu_fdt_setprop_string(ms->fdt, "/", "model",
>>> "riscv-rvsp-ref,qemu");
>>> +    qemu_fdt_setprop_string(ms->fdt, "/", "compatible",
>>> "riscv-rvsp-ref");
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
>>> +
>>> +    qemu_fdt_add_subnode(ms->fdt, "/soc");
>>> +    qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
>>> +    qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible",
>>> "simple-bus");
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
>>> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
>>> +
>>> +    create_fdt_sockets(s, memmap, &phandle, &irq_mmio_phandle,
>>> +                       &irq_pcie_phandle, &msi_pcie_phandle);
>>> +
>>> +    create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
>>> +
>>> +    create_fdt_reset(s, memmap, &phandle);
>>> +
>>> +    create_fdt_uart(s, memmap, irq_mmio_phandle);
>>> +
>>> +    create_fdt_rtc(s, memmap, irq_mmio_phandle);
>>> +
>>> +    create_fdt_flash(s, memmap);
>>> +    create_fdt_pmu(s);
>>> +
>>> +    /* Pass seed to RNG */
>>> +    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
>>> +    qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
>>> +                     rng_seed, sizeof(rng_seed));
>>> +}
>>> +
>>> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
>>> +                                          DeviceState *irqchip,
>>> +                                          RVSPMachineState *s)
>>> +{
>>> +    DeviceState *dev;
>>> +    PCIHostState *pci;
>>> +    PCIDevice *pdev_ahci;
>>> +    AHCIPCIState *ich9;
>>> +    DriveInfo *hd[NUM_SATA_PORTS];
>>> +    MemoryRegion *ecam_alias, *ecam_reg;
>>> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
>>> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
>>> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
>>> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
>>> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
>>> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
>>> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
>>> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
>>> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
>>> +    MachineClass *mc = MACHINE_GET_CLASS(s);
>>> +    qemu_irq irq;
>>> +    int i;
>>> +
>>> +    dev = qdev_new(TYPE_GPEX_HOST);
>>> +
>>> +    /* Set GPEX object properties for the rvsp ref machine */
>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
>>> +                            ecam_base, NULL);
>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
>>> +                            ecam_size, NULL);
>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>>> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
>>> +                             mmio_base, NULL);
>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)),
>>> PCI_HOST_BELOW_4G_MMIO_SIZE,
>>> +                            mmio_size, NULL);
>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>>> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
>>> +                             high_mmio_base, NULL);
>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)),
>>> PCI_HOST_ABOVE_4G_MMIO_SIZE,
>>> +                            high_mmio_size, NULL);
>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
>>> +                            pio_base, NULL);
>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
>>> +                            pio_size, NULL);
>>> +
>>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>>> +
>>> +    ecam_alias = g_new0(MemoryRegion, 1);
>>> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
>>> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
>>> +                             ecam_reg, 0, ecam_size);
>>> +    memory_region_add_subregion(get_system_memory(), ecam_base,
>>> ecam_alias);
>>> +
>>> +    mmio_alias = g_new0(MemoryRegion, 1);
>>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>>> +                             mmio_reg, mmio_base, mmio_size);
>>> +    memory_region_add_subregion(get_system_memory(), mmio_base,
>>> mmio_alias);
>>> +
>>> +    /* Map high MMIO space */
>>> +    high_mmio_alias = g_new0(MemoryRegion, 1);
>>> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev),
>>> "pcie-mmio-high",
>>> +                             mmio_reg, high_mmio_base, high_mmio_size);
>>> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
>>> +                                high_mmio_alias);
>>> +
>>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
>>> +
>>> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
>>> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
>>> +
>>> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
>>> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
>>> +    }
>>> +
>>> +    pci = PCI_HOST_BRIDGE(dev);
>>> +    pci_init_nic_devices(pci->bus, mc->default_nic);
>>> +    /* IDE disk setup.  */
>>> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
>>> +    ich9 = ICH9_AHCI(pdev_ahci);
>>> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
>>> +    ide_drive_get(hd, ich9->ahci.ports);
>>> +    ahci_ide_create_devs(&ich9->ahci, hd);
>>> +
>>> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
>>> +    return dev;
>>> +}
>>> +
>>> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
>>> +                                        const MemMapEntry *memmap,
>>> int socket,
>>> +                                        int base_hartid, int hart_count)
>>> +{
>>> +    int i;
>>> +    hwaddr addr;
>>> +    uint32_t guest_bits;
>>> +    DeviceState *aplic_s = NULL;
>>> +    DeviceState *aplic_m = NULL;
>>> +    bool msimode = true;
>>> +
>>> +    /* Per-socket M-level IMSICs */
>>> +    addr = memmap[RVSP_IMSIC_M].base +
>>> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>>> +    for (i = 0; i < hart_count; i++) {
>>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
>>> +                           base_hartid + i, true, 1,
>>> +                           RVSP_IRQCHIP_NUM_MSIS);
>>> +    }
>>> +
>>> +    /* Per-socket S-level IMSICs */
>>> +    guest_bits = imsic_num_bits(aia_guests + 1);
>>> +    addr = memmap[RVSP_IMSIC_S].base + socket *
>>> RVSP_IMSIC_GROUP_MAX_SIZE;
>>> +    for (i = 0; i < hart_count; i++) {
>>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
>>> +                           base_hartid + i, false, 1 + aia_guests,
>>> +                           RVSP_IRQCHIP_NUM_MSIS);
>>> +    }
>>> +
>>> +    /* Per-socket M-level APLIC */
>>> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
>>> +                                 socket * memmap[RVSP_APLIC_M].size,
>>> +                                 memmap[RVSP_APLIC_M].size,
>>> +                                 (msimode) ? 0 : base_hartid,
>>> +                                 (msimode) ? 0 : hart_count,
>>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>>> +                                 msimode, true, NULL);
>>> +
>>> +    /* Per-socket S-level APLIC */
>>> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
>>> +                                 socket * memmap[RVSP_APLIC_S].size,
>>> +                                 memmap[RVSP_APLIC_S].size,
>>> +                                 (msimode) ? 0 : base_hartid,
>>> +                                 (msimode) ? 0 : hart_count,
>>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>>> +                                 msimode, false, aplic_m);
>>> +
>>> +    (void)aplic_s;
>>> +    return aplic_m;
>>> +}
>>> +
>>> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
>>> +{
>>> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
>>> +                                       machine_done);
>>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>>> +    MachineState *machine = MACHINE(s);
>>> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
>>> +    target_ulong firmware_end_addr, kernel_start_addr;
>>> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
>>> +    uint64_t fdt_load_addr;
>>> +    uint64_t kernel_entry = 0;
>>> +    BlockBackend *pflash_blk0;
>>> +
>>> +    /* load/create device tree */
>>> +    if (machine->dtb) {
>>> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
>>> +        if (!machine->fdt) {
>>> +            error_report("load_device_tree() failed");
>>> +            exit(1);
>>> +        }
>>> +    } else {
>>> +        create_fdt(s, memmap);
>>> +    }
>>
>> We fixed a bug in the 'virt' machine  that had to do with the
>> guest-loader device
>> not being properly configured. It had to do with the FDT not being
>> properly generated.
>> See 7a87ba8956e59 and https://gitlab.com/qemu-project/qemu/-/issues/1925
>> for more info.
>>
>> Long story short, the 'virt' machine introduced a 'finalize_fdt' step
>> where some pieces
>> of FDT is created during machine_done(), and some are created during
>> init() via create_fdt().
>> I think it's a good idea to consider doing that in this new machine to
>> avoid this kind of
>> problem.
>>
> Sure, I will do. I started this work on a feature branch using virt.c as
> the basis and ported it to upstream recently, so the recent changes in
> virt.c are not integrated here.
> 
> Thanks,
> Fei.
> 
>>
>> Thanks,
>>
>> Daniel
>>
>>> +
>>> +    firmware_end_addr = riscv_find_and_load_firmware(machine,
>>> firmware_name,
>>> +                                                     start_addr, NULL);
>>> +
>>> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
>>> +    if (pflash_blk0) {
>>> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
>>> +            /*
>>> +             * Pflash was supplied but bios is none and not KVM guest,
>>> +             * let's overwrite the address we jump to after reset to
>>> +             * the base of the flash.
>>> +             */
>>> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
>>> +        } else {
>>> +            /*
>>> +             * Pflash was supplied but either KVM guest or bios is
>>> not none.
>>> +             * In this case, base of the flash would contain S-mode
>>> payload.
>>> +             */
>>> +            riscv_setup_firmware_boot(machine);
>>> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
>>> +        }
>>> +    }
>>> +
>>> +    if (machine->kernel_filename && !kernel_entry) {
>>> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>>> +
>>> firmware_end_addr);
>>> +
>>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>>> +                                         kernel_start_addr, true, NULL);
>>> +    }
>>> +
>>> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
>>> +                                           memmap[RVSP_DRAM].size,
>>> +                                           machine);
>>> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
>>> +
>>> +    /* load the reset vector */
>>> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
>>> +                              rvsp_ref_memmap[RVSP_MROM].base,
>>> +                              rvsp_ref_memmap[RVSP_MROM].size,
>>> kernel_entry,
>>> +                              fdt_load_addr);
>>> +
>>> +}
>>> +
>>> +static void rvsp_ref_machine_init(MachineState *machine)
>>> +{
>>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
>>> +    MemoryRegion *system_memory = get_system_memory();
>>> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
>>> +    char *soc_name;
>>> +    DeviceState *mmio_irqchip, *pcie_irqchip;
>>> +    int i, base_hartid, hart_count;
>>> +    int socket_count = riscv_socket_count(machine);
>>> +
>>> +    /* Check socket count limit */
>>> +    if (RVSP_SOCKETS_MAX < socket_count) {
>>> +        error_report("number of sockets/nodes should be less than %d",
>>> +            RVSP_SOCKETS_MAX);
>>> +        exit(1);
>>> +    }
>>> +
>>> +    if (!tcg_enabled()) {
>>> +        error_report("'aclint' is only available with TCG
>>> acceleration");
>>> +        exit(1);
>>> +    }
>>> +
>>> +    /* Initialize sockets */
>>> +    mmio_irqchip = pcie_irqchip = NULL;
>>> +    for (i = 0; i < socket_count; i++) {
>>> +        if (!riscv_socket_check_hartids(machine, i)) {
>>> +            error_report("discontinuous hartids in socket%d", i);
>>> +            exit(1);
>>> +        }
>>> +
>>> +        base_hartid = riscv_socket_first_hartid(machine, i);
>>> +        if (base_hartid < 0) {
>>> +            error_report("can't find hartid base for socket%d", i);
>>> +            exit(1);
>>> +        }
>>> +
>>> +        hart_count = riscv_socket_hart_count(machine, i);
>>> +        if (hart_count < 0) {
>>> +            error_report("can't find hart count for socket%d", i);
>>> +            exit(1);
>>> +        }
>>> +
>>> +        soc_name = g_strdup_printf("soc%d", i);
>>> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
>>> +                                TYPE_RISCV_HART_ARRAY);
>>> +        g_free(soc_name);
>>> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
>>> +                                machine->cpu_type, &error_abort);
>>> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
>>> +                                base_hartid, &error_abort);
>>> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
>>> +                                hart_count, &error_abort);
>>> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
>>> +
>>> +        /* Per-socket ACLINT MTIMER */
>>> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
>>> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>>> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>>> +            base_hartid, hart_count,
>>> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
>>> +            RISCV_ACLINT_DEFAULT_MTIME,
>>> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
>>> +
>>> +        /* Per-socket interrupt controller */
>>> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
>>> +                                            memmap, i, base_hartid,
>>> +                                            hart_count);
>>> +
>>> +        /* Try to use different IRQCHIP instance based device type */
>>> +        if (i == 0) {
>>> +            mmio_irqchip = s->irqchip[i];
>>> +            pcie_irqchip = s->irqchip[i];
>>> +        }
>>> +        if (i == 1) {
>>> +            pcie_irqchip = s->irqchip[i];
>>> +        }
>>> +    }
>>> +
>>> +    s->memmap = rvsp_ref_memmap;
>>> +
>>> +    /* register system main memory (actual RAM) */
>>> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
>>> +        machine->ram);
>>> +
>>> +    /* boot rom */
>>> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
>>> +                           memmap[RVSP_MROM].size, &error_fatal);
>>> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
>>> +                                mask_rom);
>>> +
>>> +    /* SiFive Test MMIO device */
>>> +    sifive_test_create(memmap[RVSP_TEST].base);
>>> +
>>> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
>>> +
>>> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
>>> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
>>> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
>>> +
>>> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
>>> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
>>> +        /* Map legacy -drive if=pflash to machine properties */
>>> +        pflash_cfi01_legacy_drive(s->flash[i],
>>> +                                  drive_get(IF_PFLASH, 0, i));
>>> +    }
>>> +    rvsp_flash_map(s, system_memory);
>>> +
>>> +    s->machine_done.notify = rvsp_ref_machine_done;
>>> +    qemu_add_machine_init_done_notifier(&s->machine_done);
>>> +}
>>> +
>>> +static void rvsp_ref_machine_instance_init(Object *obj)
>>> +{
>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>> +
>>> +    rvsp_flash_create(s);
>>> +}
>>> +
>>> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
>>> +{
>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>> +    char val[32];
>>> +
>>> +    sprintf(val, "%d", s->aia_guests);
>>> +    return g_strdup(val);
>>> +}
>>> +
>>> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val,
>>> Error **errp)
>>> +{
>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>> +
>>> +    s->aia_guests = atoi(val);
>>> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
>>> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
>>> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
>>> +                          RVSP_IRQCHIP_MAX_GUESTS);
>>> +    }
>>> +}
>>> +
>>> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
>>> +{
>>> +    char str[128];
>>> +    MachineClass *mc = MACHINE_CLASS(oc);
>>> +
>>> +    mc->desc = "RISC-V Server SoC Reference board";
>>> +    mc->init = rvsp_ref_machine_init;
>>> +    mc->max_cpus = RVSP_CPUS_MAX;
>>> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
>>> +    mc->pci_allow_0_address = true;
>>> +    mc->default_nic = "e1000e";
>>> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
>>> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
>>> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
>>> +    mc->numa_mem_supported = true;
>>> +    /* platform instead of architectural choice */
>>> +    mc->cpu_cluster_has_numa_boundary = true;
>>> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
>>> +
>>> +    object_class_property_add_str(oc, "aia-guests",
>>> +                                  rvsp_ref_get_aia_guests,
>>> +                                  rvsp_ref_set_aia_guests);
>>> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid
>>> value "
>>> +                 "should be between 0 and %d.",
>>> RVSP_IRQCHIP_MAX_GUESTS);
>>> +    object_class_property_set_description(oc, "aia-guests", str);
>>> +}
>>> +
>>> +static const TypeInfo rvsp_ref_typeinfo = {
>>> +    .name       = TYPE_RVSP_REF_MACHINE,
>>> +    .parent     = TYPE_MACHINE,
>>> +    .class_init = rvsp_ref_machine_class_init,
>>> +    .instance_init = rvsp_ref_machine_instance_init,
>>> +    .instance_size = sizeof(RVSPMachineState),
>>> +};
>>> +
>>> +static void rvsp_ref_init_register_types(void)
>>> +{
>>> +    type_register_static(&rvsp_ref_typeinfo);
>>> +}
>>> +
>>> +type_init(rvsp_ref_init_register_types)
>
Alistair Francis March 6, 2024, 12:19 a.m. UTC | #4
On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
>
> The RISC-V Server Platform specification[1] defines a standardized set
> of hardware and software capabilities, that portable system software,
> such as OS and hypervisors can rely on being present in a RISC-V server
> platform.
>
> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> short) machine type is added to provide a environment for firmware/OS
> development and testing. The main features included in rvsp-ref are:
>
>  - Based on riscv virt machine type
>  - A new memory map as close as virt machine as possible
>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
>  - AIA
>  - PCIe AHCI
>  - PCIe NIC
>  - No virtio device
>  - No fw_cfg device
>  - No ACPI table provided
>  - Only minimal device tree nodes
>
> [1] https://github.com/riscv-non-isa/riscv-server-platform

+ Atish

>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>  configs/devices/riscv64-softmmu/default.mak |    1 +
>  hw/riscv/Kconfig                            |   13 +
>  hw/riscv/meson.build                        |    1 +
>  hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>  4 files changed, 1259 insertions(+)
>  create mode 100644 hw/riscv/server_platform_ref.c
>
> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
> index 3f68059448..a1d98e49ef 100644
> --- a/configs/devices/riscv64-softmmu/default.mak
> +++ b/configs/devices/riscv64-softmmu/default.mak
> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>  CONFIG_SIFIVE_E=y
>  CONFIG_SIFIVE_U=y
>  CONFIG_RISCV_VIRT=y
> +CONFIG_SERVER_PLATFORM_REF=y
>  CONFIG_MICROCHIP_PFSOC=y
>  CONFIG_SHAKTI_C=y
> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> index 5d644eb7b1..debac5a7f5 100644
> --- a/hw/riscv/Kconfig
> +++ b/hw/riscv/Kconfig
> @@ -48,6 +48,19 @@ config RISCV_VIRT
>      select ACPI
>      select ACPI_PCI
>
> +config SERVER_PLATFORM_REF
> +    bool
> +    select RISCV_NUMA
> +    select GOLDFISH_RTC
> +    select PCI
> +    select PCI_EXPRESS_GENERIC_BRIDGE
> +    select PFLASH_CFI01
> +    select SERIAL
> +    select RISCV_ACLINT
> +    select RISCV_APLIC
> +    select RISCV_IMSIC
> +    select SIFIVE_TEST

Do we really need SiFive Test in the server platform?

Same with the goldfish RTC?

> +
>  config SHAKTI_C
>      bool
>      select RISCV_ACLINT
> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> index 2f7ee81be3..bb3aff91ea 100644
> --- a/hw/riscv/meson.build
> +++ b/hw/riscv/meson.build
> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
>  riscv_ss.add(files('riscv_hart.c'))
>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
> new file mode 100644
> index 0000000000..ae90c4b27a
> --- /dev/null
> +++ b/hw/riscv/server_platform_ref.c
> @@ -0,0 +1,1244 @@
> +/*
> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board

Platform

> + *
> + * Copyright (c) 2024 Intel, Inc.
> + *
> + * This board is compliant RISC-V Server platform specification and leveraging
> + * a lot of riscv virt code.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/error-report.h"
> +#include "qemu/guest-random.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-visit-common.h"
> +#include "hw/boards.h"
> +#include "hw/loader.h"
> +#include "hw/sysbus.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/char/serial.h"
> +#include "hw/block/flash.h"
> +#include "hw/ide/pci.h"
> +#include "hw/ide/ahci-pci.h"
> +#include "hw/pci/pci.h"
> +#include "hw/pci-host/gpex.h"
> +#include "hw/core/sysbus-fdt.h"
> +#include "hw/riscv/riscv_hart.h"
> +#include "hw/riscv/boot.h"
> +#include "hw/riscv/numa.h"
> +#include "hw/intc/riscv_aclint.h"
> +#include "hw/intc/riscv_aplic.h"
> +#include "hw/intc/riscv_imsic.h"
> +#include "hw/misc/sifive_test.h"
> +#include "chardev/char.h"
> +#include "sysemu/device_tree.h"
> +#include "sysemu/sysemu.h"
> +#include "sysemu/tcg.h"
> +#include "target/riscv/cpu.h"
> +#include "target/riscv/pmu.h"
> +#include "net/net.h"
> +
> +#define RVSP_CPUS_MAX_BITS             9
> +#define RVSP_CPUS_MAX                  (1 << RVSP_CPUS_MAX_BITS)
> +#define RVSP_SOCKETS_MAX_BITS          2
> +#define RVSP_SOCKETS_MAX               (1 << RVSP_SOCKETS_MAX_BITS)
> +
> +#define RVSP_IRQCHIP_NUM_MSIS 255
> +#define RVSP_IRQCHIP_NUM_SOURCES 96
> +#define RVSP_IRQCHIP_NUM_PRIO_BITS 3
> +#define RVSP_IRQCHIP_MAX_GUESTS_BITS 3
> +#define RVSP_IRQCHIP_MAX_GUESTS ((1U << RVSP_IRQCHIP_MAX_GUESTS_BITS) - 1U)
> +
> +#define FDT_PCI_ADDR_CELLS    3
> +#define FDT_PCI_INT_CELLS     1
> +#define FDT_APLIC_INT_CELLS   2
> +#define FDT_IMSIC_INT_CELLS   0
> +#define FDT_MAX_INT_CELLS     2
> +#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
> +                                 1 + FDT_MAX_INT_CELLS)
> +#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
> +                                 1 + FDT_APLIC_INT_CELLS)
> +
> +#define NUM_SATA_PORTS  6
> +
> +#define TYPE_RVSP_REF_MACHINE MACHINE_TYPE_NAME("rvsp-ref")
> +OBJECT_DECLARE_SIMPLE_TYPE(RVSPMachineState, RVSP_REF_MACHINE)
> +
> +struct RVSPMachineState {
> +    /*< private >*/
> +    MachineState parent;
> +
> +    /*< public >*/
> +    Notifier machine_done;
> +    RISCVHartArrayState soc[RVSP_SOCKETS_MAX];
> +    DeviceState *irqchip[RVSP_SOCKETS_MAX];
> +    PFlashCFI01 *flash[2];
> +
> +    int fdt_size;
> +    int aia_guests;
> +    const MemMapEntry *memmap;
> +};
> +
> +enum {
> +    RVSP_DEBUG,
> +    RVSP_MROM,
> +    RVSP_TEST,
> +    RVSP_RTC,
> +    RVSP_ACLINT,
> +    RVSP_APLIC_M,
> +    RVSP_APLIC_S,
> +    RVSP_UART0,
> +    RVSP_IMSIC_M,
> +    RVSP_IMSIC_S,
> +    RVSP_FLASH,
> +    RVSP_DRAM,
> +    RVSP_PCIE_MMIO,
> +    RVSP_PCIE_PIO,
> +    RVSP_PCIE_ECAM,
> +    RVSP_PCIE_MMIO_HIGH
> +};
> +
> +enum {
> +    RVSP_UART0_IRQ = 10,
> +    RVSP_RTC_IRQ = 11,
> +    RVSP_PCIE_IRQ = 0x20, /* 32 to 35 */
> +};
> +
> +/*
> + * The server soc reference machine physical address space used by some of the
> + * devices namely ACLINT, APLIC and IMSIC depend on number of Sockets, number
> + * of CPUs, and number of IMSIC guest files.
> + *
> + * Various limits defined by RVSP_SOCKETS_MAX_BITS, RVSP_CPUS_MAX_BITS, and
> + * RVSP_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization of server soc
> + * reference machine physical address space.
> + */
> +
> +#define RVSP_IMSIC_GROUP_MAX_SIZE      (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
> +#if RVSP_IMSIC_GROUP_MAX_SIZE < \
> +    IMSIC_GROUP_SIZE(RVSP_CPUS_MAX_BITS, RVSP_IRQCHIP_MAX_GUESTS_BITS)
> +#error "Can't accomodate single IMSIC group in address space"
> +#endif
> +
> +#define RVSP_IMSIC_MAX_SIZE            (RVSP_SOCKETS_MAX * \
> +                                        RVSP_IMSIC_GROUP_MAX_SIZE)
> +#if 0x4000000 < RVSP_IMSIC_MAX_SIZE
> +#error "Can't accomodate all IMSIC groups in address space"
> +#endif
> +
> +static const MemMapEntry rvsp_ref_memmap[] = {
> +    [RVSP_DEBUG] =          {        0x0,         0x100 },
> +    [RVSP_MROM] =           {     0x1000,        0xf000 },
> +    [RVSP_TEST] =           {   0x100000,        0x1000 },
> +    [RVSP_RTC] =            {   0x101000,        0x1000 },
> +    [RVSP_ACLINT] =         {  0x2000000,       0x10000 },
> +    [RVSP_PCIE_PIO] =       {  0x3000000,       0x10000 },
> +    [RVSP_APLIC_M] =        {  0xc000000, APLIC_SIZE(RVSP_CPUS_MAX) },
> +    [RVSP_APLIC_S] =        {  0xd000000, APLIC_SIZE(RVSP_CPUS_MAX) },
> +    [RVSP_UART0] =          { 0x10000000,         0x100 },
> +    [RVSP_FLASH] =          { 0x20000000,     0x4000000 },
> +    [RVSP_IMSIC_M] =        { 0x24000000, RVSP_IMSIC_MAX_SIZE },
> +    [RVSP_IMSIC_S] =        { 0x28000000, RVSP_IMSIC_MAX_SIZE },
> +    [RVSP_PCIE_ECAM] =      { 0x30000000,    0x10000000 },
> +    [RVSP_PCIE_MMIO] =      { 0x40000000,    0x40000000 },
> +    [RVSP_DRAM] =           { 0x80000000, 0xff80000000ull },
> +    [RVSP_PCIE_MMIO_HIGH] = { 0x10000000000ull, 0x10000000000ull },
> +};
> +
> +#define RVSP_FLASH_SECTOR_SIZE (256 * KiB)
> +
> +static PFlashCFI01 *rvsp_flash_create1(RVSPMachineState *s,
> +                                       const char *name,
> +                                       const char *alias_prop_name)
> +{
> +    /*
> +     * Create a single flash device.  We use the same parameters as
> +     * the flash devices on the ARM virt board.
> +     */
> +    DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
> +
> +    qdev_prop_set_uint64(dev, "sector-length", RVSP_FLASH_SECTOR_SIZE);
> +    qdev_prop_set_uint8(dev, "width", 4);
> +    qdev_prop_set_uint8(dev, "device-width", 2);
> +    qdev_prop_set_bit(dev, "big-endian", false);
> +    qdev_prop_set_uint16(dev, "id0", 0x89);
> +    qdev_prop_set_uint16(dev, "id1", 0x18);
> +    qdev_prop_set_uint16(dev, "id2", 0x00);
> +    qdev_prop_set_uint16(dev, "id3", 0x00);
> +    qdev_prop_set_string(dev, "name", name);
> +
> +    object_property_add_child(OBJECT(s), name, OBJECT(dev));
> +    object_property_add_alias(OBJECT(s), alias_prop_name,
> +                              OBJECT(dev), "drive");
> +
> +    return PFLASH_CFI01(dev);
> +}
> +
> +static void rvsp_flash_create(RVSPMachineState *s)
> +{
> +    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
> +    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
> +}
> +
> +static void rvsp_flash_map1(PFlashCFI01 *flash,
> +                            hwaddr base, hwaddr size,
> +                            MemoryRegion *sysmem)
> +{
> +    DeviceState *dev = DEVICE(flash);
> +
> +    assert(QEMU_IS_ALIGNED(size, RVSP_FLASH_SECTOR_SIZE));
> +    assert(size / RVSP_FLASH_SECTOR_SIZE <= UINT32_MAX);
> +    qdev_prop_set_uint32(dev, "num-blocks", size / RVSP_FLASH_SECTOR_SIZE);
> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +
> +    memory_region_add_subregion(sysmem, base,
> +                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
> +                                                       0));
> +}
> +
> +static void rvsp_flash_map(RVSPMachineState *s,
> +                           MemoryRegion *sysmem)
> +{
> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
> +
> +    rvsp_flash_map1(s->flash[0], flashbase, flashsize, sysmem);
> +    rvsp_flash_map1(s->flash[1], flashbase + flashsize, flashsize, sysmem);
> +}
> +
> +static void create_pcie_irq_map(RVSPMachineState *s, void *fdt, char *nodename,
> +                                uint32_t irqchip_phandle)
> +{
> +    int pin, dev;
> +    uint32_t irq_map_stride = 0;
> +    uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
> +                          FDT_MAX_INT_MAP_WIDTH] = {};
> +    uint32_t *irq_map = full_irq_map;
> +
> +    /*
> +     * This code creates a standard swizzle of interrupts such that
> +     * each device's first interrupt is based on it's PCI_SLOT number.
> +     * (See pci_swizzle_map_irq_fn())
> +     *
> +     * We only need one entry per interrupt in the table (not one per
> +     * possible slot) seeing the interrupt-map-mask will allow the table
> +     * to wrap to any number of devices.
> +     */
> +    for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
> +        int devfn = dev * 0x8;
> +
> +        for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
> +            int irq_nr = RVSP_PCIE_IRQ +
> +                         ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
> +            int i = 0;
> +
> +            /* Fill PCI address cells */
> +            irq_map[i] = cpu_to_be32(devfn << 8);
> +            i += FDT_PCI_ADDR_CELLS;
> +
> +            /* Fill PCI Interrupt cells */
> +            irq_map[i] = cpu_to_be32(pin + 1);
> +            i += FDT_PCI_INT_CELLS;
> +
> +            /* Fill interrupt controller phandle and cells */
> +            irq_map[i++] = cpu_to_be32(irqchip_phandle);
> +            irq_map[i++] = cpu_to_be32(irq_nr);
> +            irq_map[i++] = cpu_to_be32(0x4);
> +
> +            if (!irq_map_stride) {
> +                irq_map_stride = i;
> +            }
> +            irq_map += irq_map_stride;
> +        }
> +    }
> +
> +    qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
> +                     GPEX_NUM_IRQS * GPEX_NUM_IRQS *
> +                     irq_map_stride * sizeof(uint32_t));
> +
> +    qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
> +                           0x1800, 0, 0, 0x7);
> +}
> +
> +static void create_fdt_socket_cpus(RVSPMachineState *s, int socket,
> +                                   char *clust_name, uint32_t *phandle,
> +                                   uint32_t *intc_phandles)
> +{
> +    int cpu;
> +    uint32_t cpu_phandle;
> +    MachineState *ms = MACHINE(s);
> +    char *name, *cpu_name, *core_name, *intc_name, *sv_name;
> +    bool is_32_bit = riscv_is_32bit(&s->soc[0]);
> +    uint8_t satp_mode_max;
> +
> +    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
> +        RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
> +
> +        cpu_phandle = (*phandle)++;
> +
> +        cpu_name = g_strdup_printf("/cpus/cpu@%d",
> +            s->soc[socket].hartid_base + cpu);
> +        qemu_fdt_add_subnode(ms->fdt, cpu_name);
> +
> +        if (cpu_ptr->cfg.satp_mode.supported != 0) {
> +            satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
> +            sv_name = g_strdup_printf("riscv,%s",
> +                                      satp_mode_str(satp_mode_max, is_32_bit));
> +            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
> +            g_free(sv_name);
> +        }
> +
> +        name = riscv_isa_string(cpu_ptr);
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
> +        g_free(name);
> +
> +        if (cpu_ptr->cfg.ext_zicbom) {
> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
> +                                  cpu_ptr->cfg.cbom_blocksize);
> +        }
> +
> +        if (cpu_ptr->cfg.ext_zicboz) {
> +            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
> +                                  cpu_ptr->cfg.cboz_blocksize);
> +        }
> +
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv");
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
> +            s->soc[socket].hartid_base + cpu);
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type", "cpu");
> +        riscv_socket_fdt_write_id(ms, cpu_name, socket);
> +        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle", cpu_phandle);
> +
> +        intc_phandles[cpu] = (*phandle)++;
> +
> +        intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
> +        qemu_fdt_add_subnode(ms->fdt, intc_name);
> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
> +            intc_phandles[cpu]);
> +        qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
> +            "riscv,cpu-intc");
> +        qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller", NULL, 0);
> +        qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells", 1);
> +
> +        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
> +        qemu_fdt_add_subnode(ms->fdt, core_name);
> +        qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
> +
> +        g_free(core_name);
> +        g_free(intc_name);
> +        g_free(cpu_name);
> +    }
> +}
> +
> +static void create_fdt_socket_memory(RVSPMachineState *s,
> +                                     const MemMapEntry *memmap, int socket)
> +{
> +    char *mem_name;
> +    uint64_t addr, size;
> +    MachineState *ms = MACHINE(s);
> +
> +    addr = memmap[RVSP_DRAM].base + riscv_socket_mem_offset(ms, socket);
> +    size = riscv_socket_mem_size(ms, socket);
> +    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
> +    qemu_fdt_add_subnode(ms->fdt, mem_name);
> +    qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
> +        addr >> 32, addr, size >> 32, size);
> +    qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
> +    riscv_socket_fdt_write_id(ms, mem_name, socket);
> +    g_free(mem_name);
> +}
> +
> +static void create_fdt_socket_aclint(RVSPMachineState *s,
> +                                     const MemMapEntry *memmap, int socket,
> +                                     uint32_t *intc_phandles)
> +{
> +    int cpu;
> +    char *name;
> +    unsigned long addr, size;
> +    uint32_t aclint_cells_size;
> +    uint32_t *aclint_mtimer_cells;
> +    MachineState *ms = MACHINE(s);
> +
> +    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
> +
> +    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
> +        aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
> +    }
> +    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
> +
> +    addr = memmap[RVSP_ACLINT].base +
> +           (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
> +    size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
> +
> +    name = g_strdup_printf("/soc/mtimer@%lx", addr);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "riscv,aclint-mtimer");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
> +        0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
> +        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
> +        0x0, RISCV_ACLINT_DEFAULT_MTIME);
> +    qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
> +        aclint_mtimer_cells, aclint_cells_size);
> +    riscv_socket_fdt_write_id(ms, name, socket);
> +    g_free(name);
> +
> +    g_free(aclint_mtimer_cells);
> +}
> +
> +static uint32_t imsic_num_bits(uint32_t count)
> +{
> +    uint32_t ret = 0;
> +
> +    while (BIT(ret) < count) {
> +        ret++;
> +    }
> +
> +    return ret;
> +}
> +
> +static void create_fdt_one_imsic(RVSPMachineState *s, hwaddr base_addr,
> +                                 uint32_t *intc_phandles, uint32_t msi_phandle,
> +                                 bool m_mode, uint32_t imsic_guest_bits)
> +{
> +    int cpu, socket;
> +    char *imsic_name;
> +    MachineState *ms = MACHINE(s);
> +    int socket_count = riscv_socket_count(ms);
> +    uint32_t imsic_max_hart_per_socket;
> +    uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
> +
> +    imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
> +    imsic_regs = g_new0(uint32_t, socket_count * 4);
> +
> +    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
> +        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
> +    }
> +
> +    imsic_max_hart_per_socket = 0;
> +    for (socket = 0; socket < socket_count; socket++) {
> +        imsic_addr = base_addr + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +        imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
> +                     s->soc[socket].num_harts;
> +        imsic_regs[socket * 4 + 0] = 0;
> +        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
> +        imsic_regs[socket * 4 + 2] = 0;
> +        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
> +        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
> +            imsic_max_hart_per_socket = s->soc[socket].num_harts;
> +        }
> +    }
> +
> +    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
> +    qemu_fdt_add_subnode(ms->fdt, imsic_name);
> +    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
> +                          FDT_IMSIC_INT_CELLS);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
> +                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
> +    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
> +                     socket_count * sizeof(uint32_t) * 4);
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
> +                     RVSP_IRQCHIP_NUM_MSIS);
> +
> +    if (imsic_guest_bits) {
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
> +                              imsic_guest_bits);
> +    }
> +
> +    if (socket_count > 1) {
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
> +                              imsic_num_bits(imsic_max_hart_per_socket));
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
> +                              imsic_num_bits(socket_count));
> +        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
> +                              IMSIC_MMIO_GROUP_MIN_SHIFT);
> +    }
> +    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
> +
> +    g_free(imsic_name);
> +    g_free(imsic_regs);
> +    g_free(imsic_cells);
> +}
> +
> +static void create_fdt_imsic(RVSPMachineState *s, const MemMapEntry *memmap,
> +                             uint32_t *phandle, uint32_t *intc_phandles,
> +                             uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
> +{
> +    *msi_m_phandle = (*phandle)++;
> +    *msi_s_phandle = (*phandle)++;
> +
> +    /* M-level IMSIC node */
> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_M].base, intc_phandles,
> +                         *msi_m_phandle, true, 0);
> +
> +    /* S-level IMSIC node */
> +    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_S].base, intc_phandles,
> +                         *msi_s_phandle, false,
> +                         imsic_num_bits(s->aia_guests + 1));
> +
> +}
> +
> +static void create_fdt_one_aplic(RVSPMachineState *s, int socket,
> +                                 unsigned long aplic_addr, uint32_t aplic_size,
> +                                 uint32_t msi_phandle,
> +                                 uint32_t *intc_phandles,
> +                                 uint32_t aplic_phandle,
> +                                 uint32_t aplic_child_phandle,
> +                                 bool m_mode, int num_harts)
> +{
> +    int cpu;
> +    char *aplic_name;
> +    uint32_t *aplic_cells;
> +    MachineState *ms = MACHINE(s);
> +
> +    aplic_cells = g_new0(uint32_t, num_harts * 2);
> +
> +    for (cpu = 0; cpu < num_harts; cpu++) {
> +        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
> +    }
> +
> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
> +    qemu_fdt_add_subnode(ms->fdt, aplic_name);
> +    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
> +                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
> +    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
> +
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
> +
> +    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
> +                           0x0, aplic_addr, 0x0, aplic_size);
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
> +                          RVSP_IRQCHIP_NUM_SOURCES);
> +
> +    if (aplic_child_phandle) {
> +        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
> +                              aplic_child_phandle);
> +        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
> +                               aplic_child_phandle, 0x1,
> +                               RVSP_IRQCHIP_NUM_SOURCES);
> +    }
> +
> +    riscv_socket_fdt_write_id(ms, aplic_name, socket);
> +    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
> +
> +    g_free(aplic_name);
> +    g_free(aplic_cells);
> +}
> +
> +static void create_fdt_socket_aplic(RVSPMachineState *s,
> +                                    const MemMapEntry *memmap, int socket,
> +                                    uint32_t msi_m_phandle,
> +                                    uint32_t msi_s_phandle,
> +                                    uint32_t *phandle,
> +                                    uint32_t *intc_phandles,
> +                                    uint32_t *aplic_phandles,
> +                                    int num_harts)
> +{
> +    char *aplic_name;
> +    unsigned long aplic_addr;
> +    uint32_t aplic_m_phandle, aplic_s_phandle;
> +
> +    aplic_m_phandle = (*phandle)++;
> +    aplic_s_phandle = (*phandle)++;
> +
> +    /* M-level APLIC node */
> +    aplic_addr = memmap[RVSP_APLIC_M].base +
> +                 (memmap[RVSP_APLIC_M].size * socket);
> +    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_M].size,
> +                         msi_m_phandle, intc_phandles,
> +                         aplic_m_phandle, aplic_s_phandle,
> +                         true, num_harts);
> +
> +    /* S-level APLIC node */
> +    aplic_addr = memmap[RVSP_APLIC_S].base +
> +                 (memmap[RVSP_APLIC_S].size * socket);
> +    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_S].size,
> +                         msi_s_phandle, intc_phandles,
> +                         aplic_s_phandle, 0,
> +                         false, num_harts);
> +
> +    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
> +
> +    g_free(aplic_name);
> +
> +    aplic_phandles[socket] = aplic_s_phandle;
> +}
> +
> +static void create_fdt_pmu(RVSPMachineState *s)
> +{
> +    char *pmu_name;
> +    MachineState *ms = MACHINE(s);
> +    RISCVCPU hart = s->soc[0].harts[0];
> +
> +    pmu_name = g_strdup_printf("/pmu");
> +    qemu_fdt_add_subnode(ms->fdt, pmu_name);
> +    qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
> +    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
> +
> +    g_free(pmu_name);
> +}
> +
> +static void create_fdt_sockets(RVSPMachineState *s, const MemMapEntry *memmap,
> +                               uint32_t *phandle,
> +                               uint32_t *irq_mmio_phandle,
> +                               uint32_t *irq_pcie_phandle,
> +                               uint32_t *msi_pcie_phandle)
> +{
> +    char *clust_name;
> +    int socket, phandle_pos;
> +    MachineState *ms = MACHINE(s);
> +    uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
> +    uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
> +    int socket_count = riscv_socket_count(ms);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/cpus");
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
> +                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
> +    qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
> +
> +    intc_phandles = g_new0(uint32_t, ms->smp.cpus);
> +
> +    phandle_pos = ms->smp.cpus;
> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
> +        phandle_pos -= s->soc[socket].num_harts;
> +
> +        clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
> +        qemu_fdt_add_subnode(ms->fdt, clust_name);
> +
> +        create_fdt_socket_cpus(s, socket, clust_name, phandle,
> +                               &intc_phandles[phandle_pos]);
> +
> +        create_fdt_socket_memory(s, memmap, socket);
> +
> +        g_free(clust_name);
> +
> +        create_fdt_socket_aclint(s, memmap, socket,
> +            &intc_phandles[phandle_pos]);
> +    }
> +
> +    create_fdt_imsic(s, memmap, phandle, intc_phandles,
> +        &msi_m_phandle, &msi_s_phandle);
> +    *msi_pcie_phandle = msi_s_phandle;
> +
> +    phandle_pos = ms->smp.cpus;
> +    for (socket = (socket_count - 1); socket >= 0; socket--) {
> +        phandle_pos -= s->soc[socket].num_harts;
> +
> +        create_fdt_socket_aplic(s, memmap, socket,
> +                                msi_m_phandle, msi_s_phandle, phandle,
> +                                &intc_phandles[phandle_pos],
> +                                xplic_phandles,
> +                                s->soc[socket].num_harts);
> +    }
> +
> +    g_free(intc_phandles);
> +
> +    for (socket = 0; socket < socket_count; socket++) {
> +        if (socket == 0) {
> +            *irq_mmio_phandle = xplic_phandles[socket];
> +            *irq_pcie_phandle = xplic_phandles[socket];
> +        }
> +        if (socket == 1) {
> +            *irq_pcie_phandle = xplic_phandles[socket];
> +        }
> +    }
> +
> +    riscv_socket_fdt_write_distance_matrix(ms);
> +}
> +
> +static void create_fdt_pcie(RVSPMachineState *s, const MemMapEntry *memmap,
> +                            uint32_t irq_pcie_phandle,
> +                            uint32_t msi_pcie_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/pci@%lx",
> +        (long) memmap[RVSP_PCIE_ECAM].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
> +        FDT_PCI_ADDR_CELLS);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
> +        FDT_PCI_INT_CELLS);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "pci-host-ecam-generic");
> +    qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
> +        memmap[RVSP_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
> +    qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
> +        memmap[RVSP_PCIE_ECAM].base, 0, memmap[RVSP_PCIE_ECAM].size);
> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
> +        1, FDT_PCI_RANGE_IOPORT, 2, 0,
> +        2, memmap[RVSP_PCIE_PIO].base, 2, memmap[RVSP_PCIE_PIO].size,
> +        1, FDT_PCI_RANGE_MMIO,
> +        2, memmap[RVSP_PCIE_MMIO].base,
> +        2, memmap[RVSP_PCIE_MMIO].base, 2, memmap[RVSP_PCIE_MMIO].size,
> +        1, FDT_PCI_RANGE_MMIO_64BIT,
> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base,
> +        2, memmap[RVSP_PCIE_MMIO_HIGH].base, 2,
> +           memmap[RVSP_PCIE_MMIO_HIGH].size);
> +
> +    create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
> +    g_free(name);
> +}
> +
> +static void create_fdt_reset(RVSPMachineState *s, const MemMapEntry *memmap,
> +                             uint32_t *phandle)
> +{
> +    char *name;
> +    uint32_t test_phandle;
> +    MachineState *ms = MACHINE(s);
> +
> +    test_phandle = (*phandle)++;
> +    name = g_strdup_printf("/soc/test@%lx",
> +        (long)memmap[RVSP_TEST].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    {
> +        static const char * const compat[3] = {
> +            "sifive,test1", "sifive,test0", "syscon"
> +        };
> +        qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
> +                                      (char **)&compat, ARRAY_SIZE(compat));
> +    }
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_TEST].base, 0x0, memmap[RVSP_TEST].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
> +    test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
> +    g_free(name);
> +
> +    name = g_strdup_printf("/reboot");
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-reboot");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
> +    g_free(name);
> +
> +    name = g_strdup_printf("/poweroff");
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-poweroff");
> +    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
> +    g_free(name);
> +}
> +
> +static void create_fdt_uart(RVSPMachineState *s, const MemMapEntry *memmap,
> +                            uint32_t irq_mmio_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/serial@%lx", (long)memmap[RVSP_UART0].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_UART0].base,
> +        0x0, memmap[RVSP_UART0].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_UART0_IRQ, 0x4);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/chosen");
> +    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
> +    g_free(name);
> +}
> +
> +static void create_fdt_rtc(RVSPMachineState *s, const MemMapEntry *memmap,
> +                           uint32_t irq_mmio_phandle)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +
> +    name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[RVSP_RTC].base);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
> +        "google,goldfish-rtc");
> +    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
> +        0x0, memmap[RVSP_RTC].base, 0x0, memmap[RVSP_RTC].size);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
> +        irq_mmio_phandle);
> +    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_RTC_IRQ, 0x4);
> +    g_free(name);
> +}
> +
> +static void create_fdt_flash(RVSPMachineState *s, const MemMapEntry *memmap)
> +{
> +    char *name;
> +    MachineState *ms = MACHINE(s);
> +    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
> +    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
> +
> +    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
> +    qemu_fdt_add_subnode(ms->fdt, name);
> +    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
> +    qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
> +                                 2, flashbase, 2, flashsize,
> +                                 2, flashbase + flashsize, 2, flashsize);
> +    qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
> +    g_free(name);
> +}
> +
> +static void create_fdt(RVSPMachineState *s, const MemMapEntry *memmap)
> +{
> +    MachineState *ms = MACHINE(s);
> +    uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
> +    uint32_t irq_pcie_phandle = 1;
> +    uint8_t rng_seed[32];
> +
> +    ms->fdt = create_device_tree(&s->fdt_size);
> +    if (!ms->fdt) {
> +        error_report("create_device_tree() failed");
> +        exit(1);
> +    }
> +
> +    qemu_fdt_setprop_string(ms->fdt, "/", "model", "riscv-rvsp-ref,qemu");
> +    qemu_fdt_setprop_string(ms->fdt, "/", "compatible", "riscv-rvsp-ref");
> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
> +    qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
> +
> +    qemu_fdt_add_subnode(ms->fdt, "/soc");
> +    qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
> +    qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible", "simple-bus");
> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
> +    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
> +
> +    create_fdt_sockets(s, memmap, &phandle, &irq_mmio_phandle,
> +                       &irq_pcie_phandle, &msi_pcie_phandle);
> +
> +    create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
> +
> +    create_fdt_reset(s, memmap, &phandle);
> +
> +    create_fdt_uart(s, memmap, irq_mmio_phandle);
> +
> +    create_fdt_rtc(s, memmap, irq_mmio_phandle);
> +
> +    create_fdt_flash(s, memmap);
> +    create_fdt_pmu(s);
> +
> +    /* Pass seed to RNG */
> +    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
> +    qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
> +                     rng_seed, sizeof(rng_seed));
> +}
> +
> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> +                                          DeviceState *irqchip,
> +                                          RVSPMachineState *s)
> +{
> +    DeviceState *dev;
> +    PCIHostState *pci;
> +    PCIDevice *pdev_ahci;
> +    AHCIPCIState *ich9;
> +    DriveInfo *hd[NUM_SATA_PORTS];
> +    MemoryRegion *ecam_alias, *ecam_reg;
> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
> +    MachineClass *mc = MACHINE_GET_CLASS(s);
> +    qemu_irq irq;
> +    int i;
> +
> +    dev = qdev_new(TYPE_GPEX_HOST);
> +
> +    /* Set GPEX object properties for the rvsp ref machine */
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
> +                            ecam_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
> +                            ecam_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
> +                             mmio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
> +                            mmio_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
> +                             high_mmio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
> +                            high_mmio_size, NULL);
> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
> +                            pio_base, NULL);
> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
> +                            pio_size, NULL);
> +
> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +
> +    ecam_alias = g_new0(MemoryRegion, 1);
> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
> +                             ecam_reg, 0, ecam_size);
> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
> +
> +    mmio_alias = g_new0(MemoryRegion, 1);
> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> +                             mmio_reg, mmio_base, mmio_size);
> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
> +
> +    /* Map high MMIO space */
> +    high_mmio_alias = g_new0(MemoryRegion, 1);
> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
> +                             mmio_reg, high_mmio_base, high_mmio_size);
> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
> +                                high_mmio_alias);
> +
> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
> +
> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
> +
> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
> +    }
> +
> +    pci = PCI_HOST_BRIDGE(dev);
> +    pci_init_nic_devices(pci->bus, mc->default_nic);
> +    /* IDE disk setup.  */
> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
> +    ich9 = ICH9_AHCI(pdev_ahci);
> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
> +    ide_drive_get(hd, ich9->ahci.ports);
> +    ahci_ide_create_devs(&ich9->ahci, hd);
> +
> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
> +    return dev;
> +}

We should share as much of this code as possible with the virt machine.

Alistair

> +
> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
> +                                        const MemMapEntry *memmap, int socket,
> +                                        int base_hartid, int hart_count)
> +{
> +    int i;
> +    hwaddr addr;
> +    uint32_t guest_bits;
> +    DeviceState *aplic_s = NULL;
> +    DeviceState *aplic_m = NULL;
> +    bool msimode = true;
> +
> +    /* Per-socket M-level IMSICs */
> +    addr = memmap[RVSP_IMSIC_M].base +
> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +    for (i = 0; i < hart_count; i++) {
> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
> +                           base_hartid + i, true, 1,
> +                           RVSP_IRQCHIP_NUM_MSIS);
> +    }
> +
> +    /* Per-socket S-level IMSICs */
> +    guest_bits = imsic_num_bits(aia_guests + 1);
> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> +    for (i = 0; i < hart_count; i++) {
> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
> +                           base_hartid + i, false, 1 + aia_guests,
> +                           RVSP_IRQCHIP_NUM_MSIS);
> +    }
> +
> +    /* Per-socket M-level APLIC */
> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
> +                                 socket * memmap[RVSP_APLIC_M].size,
> +                                 memmap[RVSP_APLIC_M].size,
> +                                 (msimode) ? 0 : base_hartid,
> +                                 (msimode) ? 0 : hart_count,
> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> +                                 msimode, true, NULL);
> +
> +    /* Per-socket S-level APLIC */
> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
> +                                 socket * memmap[RVSP_APLIC_S].size,
> +                                 memmap[RVSP_APLIC_S].size,
> +                                 (msimode) ? 0 : base_hartid,
> +                                 (msimode) ? 0 : hart_count,
> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> +                                 msimode, false, aplic_m);
> +
> +    (void)aplic_s;
> +    return aplic_m;
> +}
> +
> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
> +{
> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
> +                                       machine_done);
> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> +    MachineState *machine = MACHINE(s);
> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
> +    target_ulong firmware_end_addr, kernel_start_addr;
> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
> +    uint64_t fdt_load_addr;
> +    uint64_t kernel_entry = 0;
> +    BlockBackend *pflash_blk0;
> +
> +    /* load/create device tree */
> +    if (machine->dtb) {
> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
> +        if (!machine->fdt) {
> +            error_report("load_device_tree() failed");
> +            exit(1);
> +        }
> +    } else {
> +        create_fdt(s, memmap);
> +    }
> +
> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
> +                                                     start_addr, NULL);
> +
> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
> +    if (pflash_blk0) {
> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
> +            /*
> +             * Pflash was supplied but bios is none and not KVM guest,
> +             * let's overwrite the address we jump to after reset to
> +             * the base of the flash.
> +             */
> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
> +        } else {
> +            /*
> +             * Pflash was supplied but either KVM guest or bios is not none.
> +             * In this case, base of the flash would contain S-mode payload.
> +             */
> +            riscv_setup_firmware_boot(machine);
> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
> +        }
> +    }
> +
> +    if (machine->kernel_filename && !kernel_entry) {
> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> +                                                         firmware_end_addr);
> +
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr, true, NULL);
> +    }
> +
> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
> +                                           memmap[RVSP_DRAM].size,
> +                                           machine);
> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
> +
> +    /* load the reset vector */
> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
> +                              rvsp_ref_memmap[RVSP_MROM].base,
> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
> +                              fdt_load_addr);
> +
> +}
> +
> +static void rvsp_ref_machine_init(MachineState *machine)
> +{
> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
> +    MemoryRegion *system_memory = get_system_memory();
> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
> +    char *soc_name;
> +    DeviceState *mmio_irqchip, *pcie_irqchip;
> +    int i, base_hartid, hart_count;
> +    int socket_count = riscv_socket_count(machine);
> +
> +    /* Check socket count limit */
> +    if (RVSP_SOCKETS_MAX < socket_count) {
> +        error_report("number of sockets/nodes should be less than %d",
> +            RVSP_SOCKETS_MAX);
> +        exit(1);
> +    }
> +
> +    if (!tcg_enabled()) {
> +        error_report("'aclint' is only available with TCG acceleration");
> +        exit(1);
> +    }
> +
> +    /* Initialize sockets */
> +    mmio_irqchip = pcie_irqchip = NULL;
> +    for (i = 0; i < socket_count; i++) {
> +        if (!riscv_socket_check_hartids(machine, i)) {
> +            error_report("discontinuous hartids in socket%d", i);
> +            exit(1);
> +        }
> +
> +        base_hartid = riscv_socket_first_hartid(machine, i);
> +        if (base_hartid < 0) {
> +            error_report("can't find hartid base for socket%d", i);
> +            exit(1);
> +        }
> +
> +        hart_count = riscv_socket_hart_count(machine, i);
> +        if (hart_count < 0) {
> +            error_report("can't find hart count for socket%d", i);
> +            exit(1);
> +        }
> +
> +        soc_name = g_strdup_printf("soc%d", i);
> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
> +                                TYPE_RISCV_HART_ARRAY);
> +        g_free(soc_name);
> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
> +                                machine->cpu_type, &error_abort);
> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
> +                                base_hartid, &error_abort);
> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
> +                                hart_count, &error_abort);
> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
> +
> +        /* Per-socket ACLINT MTIMER */
> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> +            base_hartid, hart_count,
> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
> +            RISCV_ACLINT_DEFAULT_MTIME,
> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
> +
> +        /* Per-socket interrupt controller */
> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
> +                                            memmap, i, base_hartid,
> +                                            hart_count);
> +
> +        /* Try to use different IRQCHIP instance based device type */
> +        if (i == 0) {
> +            mmio_irqchip = s->irqchip[i];
> +            pcie_irqchip = s->irqchip[i];
> +        }
> +        if (i == 1) {
> +            pcie_irqchip = s->irqchip[i];
> +        }
> +    }
> +
> +    s->memmap = rvsp_ref_memmap;
> +
> +    /* register system main memory (actual RAM) */
> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
> +        machine->ram);
> +
> +    /* boot rom */
> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
> +                           memmap[RVSP_MROM].size, &error_fatal);
> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
> +                                mask_rom);
> +
> +    /* SiFive Test MMIO device */
> +    sifive_test_create(memmap[RVSP_TEST].base);
> +
> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
> +
> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
> +
> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
> +
> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
> +        /* Map legacy -drive if=pflash to machine properties */
> +        pflash_cfi01_legacy_drive(s->flash[i],
> +                                  drive_get(IF_PFLASH, 0, i));
> +    }
> +    rvsp_flash_map(s, system_memory);
> +
> +    s->machine_done.notify = rvsp_ref_machine_done;
> +    qemu_add_machine_init_done_notifier(&s->machine_done);
> +}
> +
> +static void rvsp_ref_machine_instance_init(Object *obj)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +
> +    rvsp_flash_create(s);
> +}
> +
> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +    char val[32];
> +
> +    sprintf(val, "%d", s->aia_guests);
> +    return g_strdup(val);
> +}
> +
> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
> +{
> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> +
> +    s->aia_guests = atoi(val);
> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
> +                          RVSP_IRQCHIP_MAX_GUESTS);
> +    }
> +}
> +
> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
> +{
> +    char str[128];
> +    MachineClass *mc = MACHINE_CLASS(oc);
> +
> +    mc->desc = "RISC-V Server SoC Reference board";
> +    mc->init = rvsp_ref_machine_init;
> +    mc->max_cpus = RVSP_CPUS_MAX;
> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
> +    mc->pci_allow_0_address = true;
> +    mc->default_nic = "e1000e";
> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
> +    mc->numa_mem_supported = true;
> +    /* platform instead of architectural choice */
> +    mc->cpu_cluster_has_numa_boundary = true;
> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
> +
> +    object_class_property_add_str(oc, "aia-guests",
> +                                  rvsp_ref_get_aia_guests,
> +                                  rvsp_ref_set_aia_guests);
> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
> +    object_class_property_set_description(oc, "aia-guests", str);
> +}
> +
> +static const TypeInfo rvsp_ref_typeinfo = {
> +    .name       = TYPE_RVSP_REF_MACHINE,
> +    .parent     = TYPE_MACHINE,
> +    .class_init = rvsp_ref_machine_class_init,
> +    .instance_init = rvsp_ref_machine_instance_init,
> +    .instance_size = sizeof(RVSPMachineState),
> +};
> +
> +static void rvsp_ref_init_register_types(void)
> +{
> +    type_register_static(&rvsp_ref_typeinfo);
> +}
> +
> +type_init(rvsp_ref_init_register_types)
> --
> 2.34.1
>
>
Conor Dooley March 6, 2024, 12:27 a.m. UTC | #5
On Mon, Mar 04, 2024 at 06:25:39PM +0800, Fei Wu wrote:

> +        name = riscv_isa_string(cpu_ptr);
> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
> +        g_free(name);

Please use riscv_isa_write_fdt() here so that you populate more than the
deprecated property here please.

Thanks,
Conor.
Wu, Fei March 6, 2024, 2:41 a.m. UTC | #6
Hi Conor,

On 3/6/2024 8:27 AM, Conor Dooley wrote:
> On Mon, Mar 04, 2024 at 06:25:39PM +0800, Fei Wu wrote:
> 
>> +        name = riscv_isa_string(cpu_ptr);
>> +        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
>> +        g_free(name);
> 
> Please use riscv_isa_write_fdt() here so that you populate more than the
> deprecated property here please.
> 
Sure, I saw it's been done in virt.c recently, I will catch up all the
improvements from virt.c to this board.

Thanks,
Fei.

> Thanks,
> Conor.
Wu, Fei March 6, 2024, 12:55 p.m. UTC | #7
On 3/6/2024 8:19 AM, Alistair Francis wrote:
> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
>>
>> The RISC-V Server Platform specification[1] defines a standardized set
>> of hardware and software capabilities, that portable system software,
>> such as OS and hypervisors can rely on being present in a RISC-V server
>> platform.
>>
>> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
>> short) machine type is added to provide a environment for firmware/OS
>> development and testing. The main features included in rvsp-ref are:
>>
>>  - Based on riscv virt machine type
>>  - A new memory map as close as virt machine as possible
>>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
>>  - AIA
>>  - PCIe AHCI
>>  - PCIe NIC
>>  - No virtio device
>>  - No fw_cfg device
>>  - No ACPI table provided
>>  - Only minimal device tree nodes
>>
>> [1] https://github.com/riscv-non-isa/riscv-server-platform
> 
> + Atish
> 
>>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>  configs/devices/riscv64-softmmu/default.mak |    1 +
>>  hw/riscv/Kconfig                            |   13 +
>>  hw/riscv/meson.build                        |    1 +
>>  hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>>  4 files changed, 1259 insertions(+)
>>  create mode 100644 hw/riscv/server_platform_ref.c
>>
>> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
>> index 3f68059448..a1d98e49ef 100644
>> --- a/configs/devices/riscv64-softmmu/default.mak
>> +++ b/configs/devices/riscv64-softmmu/default.mak
>> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>>  CONFIG_SIFIVE_E=y
>>  CONFIG_SIFIVE_U=y
>>  CONFIG_RISCV_VIRT=y
>> +CONFIG_SERVER_PLATFORM_REF=y
>>  CONFIG_MICROCHIP_PFSOC=y
>>  CONFIG_SHAKTI_C=y
>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
>> index 5d644eb7b1..debac5a7f5 100644
>> --- a/hw/riscv/Kconfig
>> +++ b/hw/riscv/Kconfig
>> @@ -48,6 +48,19 @@ config RISCV_VIRT
>>      select ACPI
>>      select ACPI_PCI
>>
>> +config SERVER_PLATFORM_REF
>> +    bool
>> +    select RISCV_NUMA
>> +    select GOLDFISH_RTC
>> +    select PCI
>> +    select PCI_EXPRESS_GENERIC_BRIDGE
>> +    select PFLASH_CFI01
>> +    select SERIAL
>> +    select RISCV_ACLINT
>> +    select RISCV_APLIC
>> +    select RISCV_IMSIC
>> +    select SIFIVE_TEST
> 
> Do we really need SiFive Test in the server platform?
> 
It's used to reset the system, is there any better choice?

Probably I can remove the "sifive,test1 sifive,test0" from the
compatible list in fdt, and only keep "syscon", I see opensbi has
already removed that support in commit c2e602707.

> Same with the goldfish RTC?
> 
Although the spec doesn't make RTC mandatory, it should be a common
practice having a RTC on server, so I add a RTC here no matter it's
goldfish or not.

>> +
>>  config SHAKTI_C
>>      bool
>>      select RISCV_ACLINT
>> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
>> index 2f7ee81be3..bb3aff91ea 100644
>> --- a/hw/riscv/meson.build
>> +++ b/hw/riscv/meson.build
>> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
>>  riscv_ss.add(files('riscv_hart.c'))
>>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
>> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
>>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
>> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
>> new file mode 100644
>> index 0000000000..ae90c4b27a
>> --- /dev/null
>> +++ b/hw/riscv/server_platform_ref.c
>> @@ -0,0 +1,1244 @@
>> +/*
>> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
> 
> Platform
> 
OK.

>> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
>> +                                          DeviceState *irqchip,
>> +                                          RVSPMachineState *s)
>> +{
>> +    DeviceState *dev;
>> +    PCIHostState *pci;
>> +    PCIDevice *pdev_ahci;
>> +    AHCIPCIState *ich9;
>> +    DriveInfo *hd[NUM_SATA_PORTS];
>> +    MemoryRegion *ecam_alias, *ecam_reg;
>> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
>> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
>> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
>> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
>> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
>> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
>> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
>> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
>> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
>> +    MachineClass *mc = MACHINE_GET_CLASS(s);
>> +    qemu_irq irq;
>> +    int i;
>> +
>> +    dev = qdev_new(TYPE_GPEX_HOST);
>> +
>> +    /* Set GPEX object properties for the rvsp ref machine */
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
>> +                            ecam_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
>> +                            ecam_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
>> +                             mmio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
>> +                            mmio_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
>> +                             high_mmio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
>> +                            high_mmio_size, NULL);
>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
>> +                            pio_base, NULL);
>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
>> +                            pio_size, NULL);
>> +
>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>> +
>> +    ecam_alias = g_new0(MemoryRegion, 1);
>> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
>> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
>> +                             ecam_reg, 0, ecam_size);
>> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
>> +
>> +    mmio_alias = g_new0(MemoryRegion, 1);
>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>> +                             mmio_reg, mmio_base, mmio_size);
>> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
>> +
>> +    /* Map high MMIO space */
>> +    high_mmio_alias = g_new0(MemoryRegion, 1);
>> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
>> +                             mmio_reg, high_mmio_base, high_mmio_size);
>> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
>> +                                high_mmio_alias);
>> +
>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
>> +
>> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
>> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
>> +
>> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
>> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
>> +    }
>> +
>> +    pci = PCI_HOST_BRIDGE(dev);
>> +    pci_init_nic_devices(pci->bus, mc->default_nic);
>> +    /* IDE disk setup.  */
>> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
>> +    ich9 = ICH9_AHCI(pdev_ahci);
>> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
>> +    ide_drive_get(hd, ich9->ahci.ports);
>> +    ahci_ide_create_devs(&ich9->ahci, hd);
>> +
>> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
>> +    return dev;
>> +}
> 
> We should share as much of this code as possible with the virt machine.
> 
This function references multiple rvsp specific variables, some
refactorings are needed to group all the information of
ecam/mmio/high_mmio/pio/irq and pass them to virt and rvsp respectively.
I see arm virt/sbsa-ref are seperated clearly and no sharing between
them. It has the benefit there is no side effect between them. Should we
use the same policy?

Thanks,
Fei.

> Alistair
> 
>> +
>> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
>> +                                        const MemMapEntry *memmap, int socket,
>> +                                        int base_hartid, int hart_count)
>> +{
>> +    int i;
>> +    hwaddr addr;
>> +    uint32_t guest_bits;
>> +    DeviceState *aplic_s = NULL;
>> +    DeviceState *aplic_m = NULL;
>> +    bool msimode = true;
>> +
>> +    /* Per-socket M-level IMSICs */
>> +    addr = memmap[RVSP_IMSIC_M].base +
>> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>> +    for (i = 0; i < hart_count; i++) {
>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
>> +                           base_hartid + i, true, 1,
>> +                           RVSP_IRQCHIP_NUM_MSIS);
>> +    }
>> +
>> +    /* Per-socket S-level IMSICs */
>> +    guest_bits = imsic_num_bits(aia_guests + 1);
>> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>> +    for (i = 0; i < hart_count; i++) {
>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
>> +                           base_hartid + i, false, 1 + aia_guests,
>> +                           RVSP_IRQCHIP_NUM_MSIS);
>> +    }
>> +
>> +    /* Per-socket M-level APLIC */
>> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
>> +                                 socket * memmap[RVSP_APLIC_M].size,
>> +                                 memmap[RVSP_APLIC_M].size,
>> +                                 (msimode) ? 0 : base_hartid,
>> +                                 (msimode) ? 0 : hart_count,
>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>> +                                 msimode, true, NULL);
>> +
>> +    /* Per-socket S-level APLIC */
>> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
>> +                                 socket * memmap[RVSP_APLIC_S].size,
>> +                                 memmap[RVSP_APLIC_S].size,
>> +                                 (msimode) ? 0 : base_hartid,
>> +                                 (msimode) ? 0 : hart_count,
>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>> +                                 msimode, false, aplic_m);
>> +
>> +    (void)aplic_s;
>> +    return aplic_m;
>> +}
>> +
>> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
>> +{
>> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
>> +                                       machine_done);
>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>> +    MachineState *machine = MACHINE(s);
>> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
>> +    target_ulong firmware_end_addr, kernel_start_addr;
>> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
>> +    uint64_t fdt_load_addr;
>> +    uint64_t kernel_entry = 0;
>> +    BlockBackend *pflash_blk0;
>> +
>> +    /* load/create device tree */
>> +    if (machine->dtb) {
>> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
>> +        if (!machine->fdt) {
>> +            error_report("load_device_tree() failed");
>> +            exit(1);
>> +        }
>> +    } else {
>> +        create_fdt(s, memmap);
>> +    }
>> +
>> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
>> +                                                     start_addr, NULL);
>> +
>> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
>> +    if (pflash_blk0) {
>> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
>> +            /*
>> +             * Pflash was supplied but bios is none and not KVM guest,
>> +             * let's overwrite the address we jump to after reset to
>> +             * the base of the flash.
>> +             */
>> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
>> +        } else {
>> +            /*
>> +             * Pflash was supplied but either KVM guest or bios is not none.
>> +             * In this case, base of the flash would contain S-mode payload.
>> +             */
>> +            riscv_setup_firmware_boot(machine);
>> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
>> +        }
>> +    }
>> +
>> +    if (machine->kernel_filename && !kernel_entry) {
>> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>> +                                                         firmware_end_addr);
>> +
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>> +                                         kernel_start_addr, true, NULL);
>> +    }
>> +
>> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
>> +                                           memmap[RVSP_DRAM].size,
>> +                                           machine);
>> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
>> +
>> +    /* load the reset vector */
>> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
>> +                              rvsp_ref_memmap[RVSP_MROM].base,
>> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
>> +                              fdt_load_addr);
>> +
>> +}
>> +
>> +static void rvsp_ref_machine_init(MachineState *machine)
>> +{
>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
>> +    MemoryRegion *system_memory = get_system_memory();
>> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
>> +    char *soc_name;
>> +    DeviceState *mmio_irqchip, *pcie_irqchip;
>> +    int i, base_hartid, hart_count;
>> +    int socket_count = riscv_socket_count(machine);
>> +
>> +    /* Check socket count limit */
>> +    if (RVSP_SOCKETS_MAX < socket_count) {
>> +        error_report("number of sockets/nodes should be less than %d",
>> +            RVSP_SOCKETS_MAX);
>> +        exit(1);
>> +    }
>> +
>> +    if (!tcg_enabled()) {
>> +        error_report("'aclint' is only available with TCG acceleration");
>> +        exit(1);
>> +    }
>> +
>> +    /* Initialize sockets */
>> +    mmio_irqchip = pcie_irqchip = NULL;
>> +    for (i = 0; i < socket_count; i++) {
>> +        if (!riscv_socket_check_hartids(machine, i)) {
>> +            error_report("discontinuous hartids in socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        base_hartid = riscv_socket_first_hartid(machine, i);
>> +        if (base_hartid < 0) {
>> +            error_report("can't find hartid base for socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        hart_count = riscv_socket_hart_count(machine, i);
>> +        if (hart_count < 0) {
>> +            error_report("can't find hart count for socket%d", i);
>> +            exit(1);
>> +        }
>> +
>> +        soc_name = g_strdup_printf("soc%d", i);
>> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
>> +                                TYPE_RISCV_HART_ARRAY);
>> +        g_free(soc_name);
>> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
>> +                                machine->cpu_type, &error_abort);
>> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
>> +                                base_hartid, &error_abort);
>> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
>> +                                hart_count, &error_abort);
>> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
>> +
>> +        /* Per-socket ACLINT MTIMER */
>> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
>> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>> +            base_hartid, hart_count,
>> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
>> +            RISCV_ACLINT_DEFAULT_MTIME,
>> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
>> +
>> +        /* Per-socket interrupt controller */
>> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
>> +                                            memmap, i, base_hartid,
>> +                                            hart_count);
>> +
>> +        /* Try to use different IRQCHIP instance based device type */
>> +        if (i == 0) {
>> +            mmio_irqchip = s->irqchip[i];
>> +            pcie_irqchip = s->irqchip[i];
>> +        }
>> +        if (i == 1) {
>> +            pcie_irqchip = s->irqchip[i];
>> +        }
>> +    }
>> +
>> +    s->memmap = rvsp_ref_memmap;
>> +
>> +    /* register system main memory (actual RAM) */
>> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
>> +        machine->ram);
>> +
>> +    /* boot rom */
>> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
>> +                           memmap[RVSP_MROM].size, &error_fatal);
>> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
>> +                                mask_rom);
>> +
>> +    /* SiFive Test MMIO device */
>> +    sifive_test_create(memmap[RVSP_TEST].base);
>> +
>> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
>> +
>> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
>> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
>> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
>> +
>> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
>> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
>> +
>> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
>> +        /* Map legacy -drive if=pflash to machine properties */
>> +        pflash_cfi01_legacy_drive(s->flash[i],
>> +                                  drive_get(IF_PFLASH, 0, i));
>> +    }
>> +    rvsp_flash_map(s, system_memory);
>> +
>> +    s->machine_done.notify = rvsp_ref_machine_done;
>> +    qemu_add_machine_init_done_notifier(&s->machine_done);
>> +}
>> +
>> +static void rvsp_ref_machine_instance_init(Object *obj)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +
>> +    rvsp_flash_create(s);
>> +}
>> +
>> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +    char val[32];
>> +
>> +    sprintf(val, "%d", s->aia_guests);
>> +    return g_strdup(val);
>> +}
>> +
>> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
>> +{
>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>> +
>> +    s->aia_guests = atoi(val);
>> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
>> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
>> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
>> +                          RVSP_IRQCHIP_MAX_GUESTS);
>> +    }
>> +}
>> +
>> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
>> +{
>> +    char str[128];
>> +    MachineClass *mc = MACHINE_CLASS(oc);
>> +
>> +    mc->desc = "RISC-V Server SoC Reference board";
>> +    mc->init = rvsp_ref_machine_init;
>> +    mc->max_cpus = RVSP_CPUS_MAX;
>> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
>> +    mc->pci_allow_0_address = true;
>> +    mc->default_nic = "e1000e";
>> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
>> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
>> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
>> +    mc->numa_mem_supported = true;
>> +    /* platform instead of architectural choice */
>> +    mc->cpu_cluster_has_numa_boundary = true;
>> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
>> +
>> +    object_class_property_add_str(oc, "aia-guests",
>> +                                  rvsp_ref_get_aia_guests,
>> +                                  rvsp_ref_set_aia_guests);
>> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
>> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
>> +    object_class_property_set_description(oc, "aia-guests", str);
>> +}
>> +
>> +static const TypeInfo rvsp_ref_typeinfo = {
>> +    .name       = TYPE_RVSP_REF_MACHINE,
>> +    .parent     = TYPE_MACHINE,
>> +    .class_init = rvsp_ref_machine_class_init,
>> +    .instance_init = rvsp_ref_machine_instance_init,
>> +    .instance_size = sizeof(RVSPMachineState),
>> +};
>> +
>> +static void rvsp_ref_init_register_types(void)
>> +{
>> +    type_register_static(&rvsp_ref_typeinfo);
>> +}
>> +
>> +type_init(rvsp_ref_init_register_types)
>> --
>> 2.34.1
>>
>>
Atish Patra March 6, 2024, 7:13 p.m. UTC | #8
On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
>
> On 3/6/2024 8:19 AM, Alistair Francis wrote:
> > On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
> >>
> >> The RISC-V Server Platform specification[1] defines a standardized set
> >> of hardware and software capabilities, that portable system software,
> >> such as OS and hypervisors can rely on being present in a RISC-V server
> >> platform.
> >>
> >> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> >> short) machine type is added to provide a environment for firmware/OS
> >> development and testing. The main features included in rvsp-ref are:
> >>
> >>  - Based on riscv virt machine type
> >>  - A new memory map as close as virt machine as possible
> >>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
> >>  - AIA
> >>  - PCIe AHCI
> >>  - PCIe NIC
> >>  - No virtio device
> >>  - No fw_cfg device
> >>  - No ACPI table provided
> >>  - Only minimal device tree nodes
> >>
> >> [1] https://github.com/riscv-non-isa/riscv-server-platform
> >
> > + Atish
> >
> >>
> >> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> >> ---
> >>  configs/devices/riscv64-softmmu/default.mak |    1 +
> >>  hw/riscv/Kconfig                            |   13 +
> >>  hw/riscv/meson.build                        |    1 +
> >>  hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
> >>  4 files changed, 1259 insertions(+)
> >>  create mode 100644 hw/riscv/server_platform_ref.c
> >>
> >> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
> >> index 3f68059448..a1d98e49ef 100644
> >> --- a/configs/devices/riscv64-softmmu/default.mak
> >> +++ b/configs/devices/riscv64-softmmu/default.mak
> >> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
> >>  CONFIG_SIFIVE_E=y
> >>  CONFIG_SIFIVE_U=y
> >>  CONFIG_RISCV_VIRT=y
> >> +CONFIG_SERVER_PLATFORM_REF=y
> >>  CONFIG_MICROCHIP_PFSOC=y
> >>  CONFIG_SHAKTI_C=y
> >> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> >> index 5d644eb7b1..debac5a7f5 100644
> >> --- a/hw/riscv/Kconfig
> >> +++ b/hw/riscv/Kconfig
> >> @@ -48,6 +48,19 @@ config RISCV_VIRT
> >>      select ACPI
> >>      select ACPI_PCI
> >>
> >> +config SERVER_PLATFORM_REF
> >> +    bool
> >> +    select RISCV_NUMA
> >> +    select GOLDFISH_RTC
> >> +    select PCI
> >> +    select PCI_EXPRESS_GENERIC_BRIDGE
> >> +    select PFLASH_CFI01
> >> +    select SERIAL
> >> +    select RISCV_ACLINT
> >> +    select RISCV_APLIC
> >> +    select RISCV_IMSIC
> >> +    select SIFIVE_TEST
> >
> > Do we really need SiFive Test in the server platform?
> >
> It's used to reset the system, is there any better choice?
>
> Probably I can remove the "sifive,test1 sifive,test0" from the
> compatible list in fdt, and only keep "syscon", I see opensbi has
> already removed that support in commit c2e602707.
>
> > Same with the goldfish RTC?
> >
> Although the spec doesn't make RTC mandatory, it should be a common
> practice having a RTC on server, so I add a RTC here no matter it's
> goldfish or not.
>

The platform spec says
HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
MUST be implemented.

Can we consider goldfish RTC in this category ?

But I want to discuss a larger point as the server platform/SoC spec
defines a bunch of optional requirement.
Does this platform intend to be a platform that is a superset of all
those options or allow optionality in
the platform as well ?

> >> +
> >>  config SHAKTI_C
> >>      bool
> >>      select RISCV_ACLINT
> >> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> >> index 2f7ee81be3..bb3aff91ea 100644
> >> --- a/hw/riscv/meson.build
> >> +++ b/hw/riscv/meson.build
> >> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
> >>  riscv_ss.add(files('riscv_hart.c'))
> >>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
> >>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
> >> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
> >>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
> >>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
> >>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
> >> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
> >> new file mode 100644
> >> index 0000000000..ae90c4b27a
> >> --- /dev/null
> >> +++ b/hw/riscv/server_platform_ref.c
> >> @@ -0,0 +1,1244 @@
> >> +/*
> >> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
> >
> > Platform
> >
> OK.
>
> >> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> >> +                                          DeviceState *irqchip,
> >> +                                          RVSPMachineState *s)
> >> +{
> >> +    DeviceState *dev;
> >> +    PCIHostState *pci;
> >> +    PCIDevice *pdev_ahci;
> >> +    AHCIPCIState *ich9;
> >> +    DriveInfo *hd[NUM_SATA_PORTS];
> >> +    MemoryRegion *ecam_alias, *ecam_reg;
> >> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
> >> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
> >> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
> >> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
> >> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
> >> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
> >> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
> >> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
> >> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
> >> +    MachineClass *mc = MACHINE_GET_CLASS(s);
> >> +    qemu_irq irq;
> >> +    int i;
> >> +
> >> +    dev = qdev_new(TYPE_GPEX_HOST);
> >> +
> >> +    /* Set GPEX object properties for the rvsp ref machine */
> >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
> >> +                            ecam_base, NULL);
> >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
> >> +                            ecam_size, NULL);
> >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> >> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
> >> +                             mmio_base, NULL);
> >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
> >> +                            mmio_size, NULL);
> >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> >> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
> >> +                             high_mmio_base, NULL);
> >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
> >> +                            high_mmio_size, NULL);
> >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
> >> +                            pio_base, NULL);
> >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
> >> +                            pio_size, NULL);
> >> +
> >> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> >> +
> >> +    ecam_alias = g_new0(MemoryRegion, 1);
> >> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
> >> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
> >> +                             ecam_reg, 0, ecam_size);
> >> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
> >> +
> >> +    mmio_alias = g_new0(MemoryRegion, 1);
> >> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> >> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> >> +                             mmio_reg, mmio_base, mmio_size);
> >> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
> >> +
> >> +    /* Map high MMIO space */
> >> +    high_mmio_alias = g_new0(MemoryRegion, 1);
> >> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
> >> +                             mmio_reg, high_mmio_base, high_mmio_size);
> >> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
> >> +                                high_mmio_alias);
> >> +
> >> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
> >> +
> >> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
> >> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
> >> +
> >> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
> >> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
> >> +    }
> >> +
> >> +    pci = PCI_HOST_BRIDGE(dev);
> >> +    pci_init_nic_devices(pci->bus, mc->default_nic);
> >> +    /* IDE disk setup.  */
> >> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
> >> +    ich9 = ICH9_AHCI(pdev_ahci);
> >> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
> >> +    ide_drive_get(hd, ich9->ahci.ports);
> >> +    ahci_ide_create_devs(&ich9->ahci, hd);
> >> +
> >> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
> >> +    return dev;
> >> +}
> >
> > We should share as much of this code as possible with the virt machine.
> >
> This function references multiple rvsp specific variables, some
> refactorings are needed to group all the information of
> ecam/mmio/high_mmio/pio/irq and pass them to virt and rvsp respectively.
> I see arm virt/sbsa-ref are seperated clearly and no sharing between
> them. It has the benefit there is no side effect between them. Should we
> use the same policy?
>
> Thanks,
> Fei.
>
> > Alistair
> >
> >> +
> >> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
> >> +                                        const MemMapEntry *memmap, int socket,
> >> +                                        int base_hartid, int hart_count)
> >> +{
> >> +    int i;
> >> +    hwaddr addr;
> >> +    uint32_t guest_bits;
> >> +    DeviceState *aplic_s = NULL;
> >> +    DeviceState *aplic_m = NULL;
> >> +    bool msimode = true;
> >> +
> >> +    /* Per-socket M-level IMSICs */
> >> +    addr = memmap[RVSP_IMSIC_M].base +
> >> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> >> +    for (i = 0; i < hart_count; i++) {
> >> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
> >> +                           base_hartid + i, true, 1,
> >> +                           RVSP_IRQCHIP_NUM_MSIS);
> >> +    }
> >> +
> >> +    /* Per-socket S-level IMSICs */
> >> +    guest_bits = imsic_num_bits(aia_guests + 1);
> >> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> >> +    for (i = 0; i < hart_count; i++) {
> >> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
> >> +                           base_hartid + i, false, 1 + aia_guests,
> >> +                           RVSP_IRQCHIP_NUM_MSIS);
> >> +    }
> >> +
> >> +    /* Per-socket M-level APLIC */
> >> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
> >> +                                 socket * memmap[RVSP_APLIC_M].size,
> >> +                                 memmap[RVSP_APLIC_M].size,
> >> +                                 (msimode) ? 0 : base_hartid,
> >> +                                 (msimode) ? 0 : hart_count,
> >> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> >> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> >> +                                 msimode, true, NULL);
> >> +
> >> +    /* Per-socket S-level APLIC */
> >> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
> >> +                                 socket * memmap[RVSP_APLIC_S].size,
> >> +                                 memmap[RVSP_APLIC_S].size,
> >> +                                 (msimode) ? 0 : base_hartid,
> >> +                                 (msimode) ? 0 : hart_count,
> >> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> >> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> >> +                                 msimode, false, aplic_m);
> >> +
> >> +    (void)aplic_s;
> >> +    return aplic_m;
> >> +}
> >> +
> >> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
> >> +{
> >> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
> >> +                                       machine_done);
> >> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> >> +    MachineState *machine = MACHINE(s);
> >> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
> >> +    target_ulong firmware_end_addr, kernel_start_addr;
> >> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
> >> +    uint64_t fdt_load_addr;
> >> +    uint64_t kernel_entry = 0;
> >> +    BlockBackend *pflash_blk0;
> >> +
> >> +    /* load/create device tree */
> >> +    if (machine->dtb) {
> >> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
> >> +        if (!machine->fdt) {
> >> +            error_report("load_device_tree() failed");
> >> +            exit(1);
> >> +        }
> >> +    } else {
> >> +        create_fdt(s, memmap);
> >> +    }
> >> +
> >> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
> >> +                                                     start_addr, NULL);
> >> +
> >> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
> >> +    if (pflash_blk0) {
> >> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
> >> +            /*
> >> +             * Pflash was supplied but bios is none and not KVM guest,
> >> +             * let's overwrite the address we jump to after reset to
> >> +             * the base of the flash.
> >> +             */
> >> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
> >> +        } else {
> >> +            /*
> >> +             * Pflash was supplied but either KVM guest or bios is not none.
> >> +             * In this case, base of the flash would contain S-mode payload.
> >> +             */
> >> +            riscv_setup_firmware_boot(machine);
> >> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
> >> +        }
> >> +    }
> >> +
> >> +    if (machine->kernel_filename && !kernel_entry) {
> >> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> >> +                                                         firmware_end_addr);
> >> +
> >> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> >> +                                         kernel_start_addr, true, NULL);
> >> +    }
> >> +
> >> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
> >> +                                           memmap[RVSP_DRAM].size,
> >> +                                           machine);
> >> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
> >> +
> >> +    /* load the reset vector */
> >> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
> >> +                              rvsp_ref_memmap[RVSP_MROM].base,
> >> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
> >> +                              fdt_load_addr);
> >> +
> >> +}
> >> +
> >> +static void rvsp_ref_machine_init(MachineState *machine)
> >> +{
> >> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> >> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
> >> +    MemoryRegion *system_memory = get_system_memory();
> >> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
> >> +    char *soc_name;
> >> +    DeviceState *mmio_irqchip, *pcie_irqchip;
> >> +    int i, base_hartid, hart_count;
> >> +    int socket_count = riscv_socket_count(machine);
> >> +
> >> +    /* Check socket count limit */
> >> +    if (RVSP_SOCKETS_MAX < socket_count) {
> >> +        error_report("number of sockets/nodes should be less than %d",
> >> +            RVSP_SOCKETS_MAX);
> >> +        exit(1);
> >> +    }
> >> +
> >> +    if (!tcg_enabled()) {
> >> +        error_report("'aclint' is only available with TCG acceleration");
> >> +        exit(1);
> >> +    }
> >> +
> >> +    /* Initialize sockets */
> >> +    mmio_irqchip = pcie_irqchip = NULL;
> >> +    for (i = 0; i < socket_count; i++) {
> >> +        if (!riscv_socket_check_hartids(machine, i)) {
> >> +            error_report("discontinuous hartids in socket%d", i);
> >> +            exit(1);
> >> +        }
> >> +
> >> +        base_hartid = riscv_socket_first_hartid(machine, i);
> >> +        if (base_hartid < 0) {
> >> +            error_report("can't find hartid base for socket%d", i);
> >> +            exit(1);
> >> +        }
> >> +
> >> +        hart_count = riscv_socket_hart_count(machine, i);
> >> +        if (hart_count < 0) {
> >> +            error_report("can't find hart count for socket%d", i);
> >> +            exit(1);
> >> +        }
> >> +
> >> +        soc_name = g_strdup_printf("soc%d", i);
> >> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
> >> +                                TYPE_RISCV_HART_ARRAY);
> >> +        g_free(soc_name);
> >> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
> >> +                                machine->cpu_type, &error_abort);
> >> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
> >> +                                base_hartid, &error_abort);
> >> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
> >> +                                hart_count, &error_abort);
> >> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
> >> +
> >> +        /* Per-socket ACLINT MTIMER */
> >> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
> >> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> >> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> >> +            base_hartid, hart_count,
> >> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
> >> +            RISCV_ACLINT_DEFAULT_MTIME,
> >> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
> >> +
> >> +        /* Per-socket interrupt controller */
> >> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
> >> +                                            memmap, i, base_hartid,
> >> +                                            hart_count);
> >> +
> >> +        /* Try to use different IRQCHIP instance based device type */
> >> +        if (i == 0) {
> >> +            mmio_irqchip = s->irqchip[i];
> >> +            pcie_irqchip = s->irqchip[i];
> >> +        }
> >> +        if (i == 1) {
> >> +            pcie_irqchip = s->irqchip[i];
> >> +        }
> >> +    }
> >> +
> >> +    s->memmap = rvsp_ref_memmap;
> >> +
> >> +    /* register system main memory (actual RAM) */
> >> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
> >> +        machine->ram);
> >> +
> >> +    /* boot rom */
> >> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
> >> +                           memmap[RVSP_MROM].size, &error_fatal);
> >> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
> >> +                                mask_rom);
> >> +
> >> +    /* SiFive Test MMIO device */
> >> +    sifive_test_create(memmap[RVSP_TEST].base);
> >> +
> >> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
> >> +
> >> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
> >> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
> >> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
> >> +
> >> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
> >> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
> >> +
> >> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
> >> +        /* Map legacy -drive if=pflash to machine properties */
> >> +        pflash_cfi01_legacy_drive(s->flash[i],
> >> +                                  drive_get(IF_PFLASH, 0, i));
> >> +    }
> >> +    rvsp_flash_map(s, system_memory);
> >> +
> >> +    s->machine_done.notify = rvsp_ref_machine_done;
> >> +    qemu_add_machine_init_done_notifier(&s->machine_done);
> >> +}
> >> +
> >> +static void rvsp_ref_machine_instance_init(Object *obj)
> >> +{
> >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> >> +
> >> +    rvsp_flash_create(s);
> >> +}
> >> +
> >> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
> >> +{
> >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> >> +    char val[32];
> >> +
> >> +    sprintf(val, "%d", s->aia_guests);
> >> +    return g_strdup(val);
> >> +}
> >> +
> >> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
> >> +{
> >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> >> +
> >> +    s->aia_guests = atoi(val);
> >> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
> >> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
> >> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
> >> +                          RVSP_IRQCHIP_MAX_GUESTS);
> >> +    }
> >> +}
> >> +
> >> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
> >> +{
> >> +    char str[128];
> >> +    MachineClass *mc = MACHINE_CLASS(oc);
> >> +
> >> +    mc->desc = "RISC-V Server SoC Reference board";
> >> +    mc->init = rvsp_ref_machine_init;
> >> +    mc->max_cpus = RVSP_CPUS_MAX;
> >> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
> >> +    mc->pci_allow_0_address = true;
> >> +    mc->default_nic = "e1000e";
> >> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
> >> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
> >> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
> >> +    mc->numa_mem_supported = true;
> >> +    /* platform instead of architectural choice */
> >> +    mc->cpu_cluster_has_numa_boundary = true;
> >> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
> >> +
> >> +    object_class_property_add_str(oc, "aia-guests",
> >> +                                  rvsp_ref_get_aia_guests,
> >> +                                  rvsp_ref_set_aia_guests);
> >> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
> >> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
> >> +    object_class_property_set_description(oc, "aia-guests", str);
> >> +}
> >> +
> >> +static const TypeInfo rvsp_ref_typeinfo = {
> >> +    .name       = TYPE_RVSP_REF_MACHINE,
> >> +    .parent     = TYPE_MACHINE,
> >> +    .class_init = rvsp_ref_machine_class_init,
> >> +    .instance_init = rvsp_ref_machine_instance_init,
> >> +    .instance_size = sizeof(RVSPMachineState),
> >> +};
> >> +
> >> +static void rvsp_ref_init_register_types(void)
> >> +{
> >> +    type_register_static(&rvsp_ref_typeinfo);
> >> +}
> >> +
> >> +type_init(rvsp_ref_init_register_types)
> >> --
> >> 2.34.1
> >>
> >>
>
Alistair Francis March 7, 2024, 12:48 a.m. UTC | #9
On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
>
> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
> >
> > On 3/6/2024 8:19 AM, Alistair Francis wrote:
> > > On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
> > >>
> > >> The RISC-V Server Platform specification[1] defines a standardized set
> > >> of hardware and software capabilities, that portable system software,
> > >> such as OS and hypervisors can rely on being present in a RISC-V server
> > >> platform.
> > >>
> > >> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
> > >> short) machine type is added to provide a environment for firmware/OS
> > >> development and testing. The main features included in rvsp-ref are:
> > >>
> > >>  - Based on riscv virt machine type
> > >>  - A new memory map as close as virt machine as possible
> > >>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
> > >>  - AIA
> > >>  - PCIe AHCI
> > >>  - PCIe NIC
> > >>  - No virtio device
> > >>  - No fw_cfg device
> > >>  - No ACPI table provided
> > >>  - Only minimal device tree nodes
> > >>
> > >> [1] https://github.com/riscv-non-isa/riscv-server-platform
> > >
> > > + Atish
> > >
> > >>
> > >> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> > >> ---
> > >>  configs/devices/riscv64-softmmu/default.mak |    1 +
> > >>  hw/riscv/Kconfig                            |   13 +
> > >>  hw/riscv/meson.build                        |    1 +
> > >>  hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
> > >>  4 files changed, 1259 insertions(+)
> > >>  create mode 100644 hw/riscv/server_platform_ref.c
> > >>
> > >> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
> > >> index 3f68059448..a1d98e49ef 100644
> > >> --- a/configs/devices/riscv64-softmmu/default.mak
> > >> +++ b/configs/devices/riscv64-softmmu/default.mak
> > >> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
> > >>  CONFIG_SIFIVE_E=y
> > >>  CONFIG_SIFIVE_U=y
> > >>  CONFIG_RISCV_VIRT=y
> > >> +CONFIG_SERVER_PLATFORM_REF=y
> > >>  CONFIG_MICROCHIP_PFSOC=y
> > >>  CONFIG_SHAKTI_C=y
> > >> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
> > >> index 5d644eb7b1..debac5a7f5 100644
> > >> --- a/hw/riscv/Kconfig
> > >> +++ b/hw/riscv/Kconfig
> > >> @@ -48,6 +48,19 @@ config RISCV_VIRT
> > >>      select ACPI
> > >>      select ACPI_PCI
> > >>
> > >> +config SERVER_PLATFORM_REF
> > >> +    bool
> > >> +    select RISCV_NUMA
> > >> +    select GOLDFISH_RTC
> > >> +    select PCI
> > >> +    select PCI_EXPRESS_GENERIC_BRIDGE
> > >> +    select PFLASH_CFI01
> > >> +    select SERIAL
> > >> +    select RISCV_ACLINT
> > >> +    select RISCV_APLIC
> > >> +    select RISCV_IMSIC
> > >> +    select SIFIVE_TEST
> > >
> > > Do we really need SiFive Test in the server platform?
> > >
> > It's used to reset the system, is there any better choice?

If we add this now we are stuck with it forever (or at least a long
time). So it'd be nice to think about these and decide if these really
are the best way to do things. We don't have to just copy the existing
virt machine.

There must be a more standard way to do this then MMIO mapped SiFive hardware?

> >
> > Probably I can remove the "sifive,test1 sifive,test0" from the
> > compatible list in fdt, and only keep "syscon", I see opensbi has
> > already removed that support in commit c2e602707.
> >
> > > Same with the goldfish RTC?
> > >
> > Although the spec doesn't make RTC mandatory, it should be a common
> > practice having a RTC on server, so I add a RTC here no matter it's
> > goldfish or not.
> >
>
> The platform spec says
> HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
> MUST be implemented.
>
> Can we consider goldfish RTC in this category ?

I think so, although I haven't read the spec yet :)

My point was more that if we are going to implement a new machine, we
should aim to standardise on things that other
machines/servers/platforms/architectures do. Some of the things in the
virt machine are historical because that's what worked at the time.
But with a clean slate design there might be better alternatives.
Obviously while still sticking to the spec

>
> But I want to discuss a larger point as the server platform/SoC spec
> defines a bunch of optional requirement.
> Does this platform intend to be a platform that is a superset of all
> those options or allow optionality in
> the platform as well ?

Generally I feel that QEMU has the luxury of just supporting
everything. If there is an optional component created by the machine
it generally is fine if the guest doesn't use it. While creating
complex configuration options is a pain for users

Alistair

>
> > >> +
> > >>  config SHAKTI_C
> > >>      bool
> > >>      select RISCV_ACLINT
> > >> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
> > >> index 2f7ee81be3..bb3aff91ea 100644
> > >> --- a/hw/riscv/meson.build
> > >> +++ b/hw/riscv/meson.build
> > >> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
> > >>  riscv_ss.add(files('riscv_hart.c'))
> > >>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
> > >>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
> > >> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
> > >>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
> > >>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
> > >>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
> > >> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
> > >> new file mode 100644
> > >> index 0000000000..ae90c4b27a
> > >> --- /dev/null
> > >> +++ b/hw/riscv/server_platform_ref.c
> > >> @@ -0,0 +1,1244 @@
> > >> +/*
> > >> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
> > >
> > > Platform
> > >
> > OK.
> >
> > >> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> > >> +                                          DeviceState *irqchip,
> > >> +                                          RVSPMachineState *s)
> > >> +{
> > >> +    DeviceState *dev;
> > >> +    PCIHostState *pci;
> > >> +    PCIDevice *pdev_ahci;
> > >> +    AHCIPCIState *ich9;
> > >> +    DriveInfo *hd[NUM_SATA_PORTS];
> > >> +    MemoryRegion *ecam_alias, *ecam_reg;
> > >> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
> > >> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
> > >> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
> > >> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
> > >> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
> > >> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
> > >> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
> > >> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
> > >> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
> > >> +    MachineClass *mc = MACHINE_GET_CLASS(s);
> > >> +    qemu_irq irq;
> > >> +    int i;
> > >> +
> > >> +    dev = qdev_new(TYPE_GPEX_HOST);
> > >> +
> > >> +    /* Set GPEX object properties for the rvsp ref machine */
> > >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
> > >> +                            ecam_base, NULL);
> > >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
> > >> +                            ecam_size, NULL);
> > >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> > >> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
> > >> +                             mmio_base, NULL);
> > >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
> > >> +                            mmio_size, NULL);
> > >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
> > >> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
> > >> +                             high_mmio_base, NULL);
> > >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
> > >> +                            high_mmio_size, NULL);
> > >> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
> > >> +                            pio_base, NULL);
> > >> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
> > >> +                            pio_size, NULL);
> > >> +
> > >> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> > >> +
> > >> +    ecam_alias = g_new0(MemoryRegion, 1);
> > >> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
> > >> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
> > >> +                             ecam_reg, 0, ecam_size);
> > >> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
> > >> +
> > >> +    mmio_alias = g_new0(MemoryRegion, 1);
> > >> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
> > >> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
> > >> +                             mmio_reg, mmio_base, mmio_size);
> > >> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
> > >> +
> > >> +    /* Map high MMIO space */
> > >> +    high_mmio_alias = g_new0(MemoryRegion, 1);
> > >> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
> > >> +                             mmio_reg, high_mmio_base, high_mmio_size);
> > >> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
> > >> +                                high_mmio_alias);
> > >> +
> > >> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
> > >> +
> > >> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
> > >> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
> > >> +
> > >> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
> > >> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
> > >> +    }
> > >> +
> > >> +    pci = PCI_HOST_BRIDGE(dev);
> > >> +    pci_init_nic_devices(pci->bus, mc->default_nic);
> > >> +    /* IDE disk setup.  */
> > >> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
> > >> +    ich9 = ICH9_AHCI(pdev_ahci);
> > >> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
> > >> +    ide_drive_get(hd, ich9->ahci.ports);
> > >> +    ahci_ide_create_devs(&ich9->ahci, hd);
> > >> +
> > >> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
> > >> +    return dev;
> > >> +}
> > >
> > > We should share as much of this code as possible with the virt machine.
> > >
> > This function references multiple rvsp specific variables, some
> > refactorings are needed to group all the information of
> > ecam/mmio/high_mmio/pio/irq and pass them to virt and rvsp respectively.
> > I see arm virt/sbsa-ref are seperated clearly and no sharing between
> > them. It has the benefit there is no side effect between them. Should we
> > use the same policy?
> >
> > Thanks,
> > Fei.
> >
> > > Alistair
> > >
> > >> +
> > >> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
> > >> +                                        const MemMapEntry *memmap, int socket,
> > >> +                                        int base_hartid, int hart_count)
> > >> +{
> > >> +    int i;
> > >> +    hwaddr addr;
> > >> +    uint32_t guest_bits;
> > >> +    DeviceState *aplic_s = NULL;
> > >> +    DeviceState *aplic_m = NULL;
> > >> +    bool msimode = true;
> > >> +
> > >> +    /* Per-socket M-level IMSICs */
> > >> +    addr = memmap[RVSP_IMSIC_M].base +
> > >> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> > >> +    for (i = 0; i < hart_count; i++) {
> > >> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
> > >> +                           base_hartid + i, true, 1,
> > >> +                           RVSP_IRQCHIP_NUM_MSIS);
> > >> +    }
> > >> +
> > >> +    /* Per-socket S-level IMSICs */
> > >> +    guest_bits = imsic_num_bits(aia_guests + 1);
> > >> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
> > >> +    for (i = 0; i < hart_count; i++) {
> > >> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
> > >> +                           base_hartid + i, false, 1 + aia_guests,
> > >> +                           RVSP_IRQCHIP_NUM_MSIS);
> > >> +    }
> > >> +
> > >> +    /* Per-socket M-level APLIC */
> > >> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
> > >> +                                 socket * memmap[RVSP_APLIC_M].size,
> > >> +                                 memmap[RVSP_APLIC_M].size,
> > >> +                                 (msimode) ? 0 : base_hartid,
> > >> +                                 (msimode) ? 0 : hart_count,
> > >> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> > >> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> > >> +                                 msimode, true, NULL);
> > >> +
> > >> +    /* Per-socket S-level APLIC */
> > >> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
> > >> +                                 socket * memmap[RVSP_APLIC_S].size,
> > >> +                                 memmap[RVSP_APLIC_S].size,
> > >> +                                 (msimode) ? 0 : base_hartid,
> > >> +                                 (msimode) ? 0 : hart_count,
> > >> +                                 RVSP_IRQCHIP_NUM_SOURCES,
> > >> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
> > >> +                                 msimode, false, aplic_m);
> > >> +
> > >> +    (void)aplic_s;
> > >> +    return aplic_m;
> > >> +}
> > >> +
> > >> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
> > >> +{
> > >> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
> > >> +                                       machine_done);
> > >> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> > >> +    MachineState *machine = MACHINE(s);
> > >> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
> > >> +    target_ulong firmware_end_addr, kernel_start_addr;
> > >> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
> > >> +    uint64_t fdt_load_addr;
> > >> +    uint64_t kernel_entry = 0;
> > >> +    BlockBackend *pflash_blk0;
> > >> +
> > >> +    /* load/create device tree */
> > >> +    if (machine->dtb) {
> > >> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
> > >> +        if (!machine->fdt) {
> > >> +            error_report("load_device_tree() failed");
> > >> +            exit(1);
> > >> +        }
> > >> +    } else {
> > >> +        create_fdt(s, memmap);
> > >> +    }
> > >> +
> > >> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
> > >> +                                                     start_addr, NULL);
> > >> +
> > >> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
> > >> +    if (pflash_blk0) {
> > >> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
> > >> +            /*
> > >> +             * Pflash was supplied but bios is none and not KVM guest,
> > >> +             * let's overwrite the address we jump to after reset to
> > >> +             * the base of the flash.
> > >> +             */
> > >> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
> > >> +        } else {
> > >> +            /*
> > >> +             * Pflash was supplied but either KVM guest or bios is not none.
> > >> +             * In this case, base of the flash would contain S-mode payload.
> > >> +             */
> > >> +            riscv_setup_firmware_boot(machine);
> > >> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
> > >> +        }
> > >> +    }
> > >> +
> > >> +    if (machine->kernel_filename && !kernel_entry) {
> > >> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> > >> +                                                         firmware_end_addr);
> > >> +
> > >> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> > >> +                                         kernel_start_addr, true, NULL);
> > >> +    }
> > >> +
> > >> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
> > >> +                                           memmap[RVSP_DRAM].size,
> > >> +                                           machine);
> > >> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
> > >> +
> > >> +    /* load the reset vector */
> > >> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
> > >> +                              rvsp_ref_memmap[RVSP_MROM].base,
> > >> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
> > >> +                              fdt_load_addr);
> > >> +
> > >> +}
> > >> +
> > >> +static void rvsp_ref_machine_init(MachineState *machine)
> > >> +{
> > >> +    const MemMapEntry *memmap = rvsp_ref_memmap;
> > >> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
> > >> +    MemoryRegion *system_memory = get_system_memory();
> > >> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
> > >> +    char *soc_name;
> > >> +    DeviceState *mmio_irqchip, *pcie_irqchip;
> > >> +    int i, base_hartid, hart_count;
> > >> +    int socket_count = riscv_socket_count(machine);
> > >> +
> > >> +    /* Check socket count limit */
> > >> +    if (RVSP_SOCKETS_MAX < socket_count) {
> > >> +        error_report("number of sockets/nodes should be less than %d",
> > >> +            RVSP_SOCKETS_MAX);
> > >> +        exit(1);
> > >> +    }
> > >> +
> > >> +    if (!tcg_enabled()) {
> > >> +        error_report("'aclint' is only available with TCG acceleration");
> > >> +        exit(1);
> > >> +    }
> > >> +
> > >> +    /* Initialize sockets */
> > >> +    mmio_irqchip = pcie_irqchip = NULL;
> > >> +    for (i = 0; i < socket_count; i++) {
> > >> +        if (!riscv_socket_check_hartids(machine, i)) {
> > >> +            error_report("discontinuous hartids in socket%d", i);
> > >> +            exit(1);
> > >> +        }
> > >> +
> > >> +        base_hartid = riscv_socket_first_hartid(machine, i);
> > >> +        if (base_hartid < 0) {
> > >> +            error_report("can't find hartid base for socket%d", i);
> > >> +            exit(1);
> > >> +        }
> > >> +
> > >> +        hart_count = riscv_socket_hart_count(machine, i);
> > >> +        if (hart_count < 0) {
> > >> +            error_report("can't find hart count for socket%d", i);
> > >> +            exit(1);
> > >> +        }
> > >> +
> > >> +        soc_name = g_strdup_printf("soc%d", i);
> > >> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
> > >> +                                TYPE_RISCV_HART_ARRAY);
> > >> +        g_free(soc_name);
> > >> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
> > >> +                                machine->cpu_type, &error_abort);
> > >> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
> > >> +                                base_hartid, &error_abort);
> > >> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
> > >> +                                hart_count, &error_abort);
> > >> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
> > >> +
> > >> +        /* Per-socket ACLINT MTIMER */
> > >> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
> > >> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> > >> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
> > >> +            base_hartid, hart_count,
> > >> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
> > >> +            RISCV_ACLINT_DEFAULT_MTIME,
> > >> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
> > >> +
> > >> +        /* Per-socket interrupt controller */
> > >> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
> > >> +                                            memmap, i, base_hartid,
> > >> +                                            hart_count);
> > >> +
> > >> +        /* Try to use different IRQCHIP instance based device type */
> > >> +        if (i == 0) {
> > >> +            mmio_irqchip = s->irqchip[i];
> > >> +            pcie_irqchip = s->irqchip[i];
> > >> +        }
> > >> +        if (i == 1) {
> > >> +            pcie_irqchip = s->irqchip[i];
> > >> +        }
> > >> +    }
> > >> +
> > >> +    s->memmap = rvsp_ref_memmap;
> > >> +
> > >> +    /* register system main memory (actual RAM) */
> > >> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
> > >> +        machine->ram);
> > >> +
> > >> +    /* boot rom */
> > >> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
> > >> +                           memmap[RVSP_MROM].size, &error_fatal);
> > >> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
> > >> +                                mask_rom);
> > >> +
> > >> +    /* SiFive Test MMIO device */
> > >> +    sifive_test_create(memmap[RVSP_TEST].base);
> > >> +
> > >> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
> > >> +
> > >> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
> > >> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
> > >> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
> > >> +
> > >> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
> > >> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
> > >> +
> > >> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
> > >> +        /* Map legacy -drive if=pflash to machine properties */
> > >> +        pflash_cfi01_legacy_drive(s->flash[i],
> > >> +                                  drive_get(IF_PFLASH, 0, i));
> > >> +    }
> > >> +    rvsp_flash_map(s, system_memory);
> > >> +
> > >> +    s->machine_done.notify = rvsp_ref_machine_done;
> > >> +    qemu_add_machine_init_done_notifier(&s->machine_done);
> > >> +}
> > >> +
> > >> +static void rvsp_ref_machine_instance_init(Object *obj)
> > >> +{
> > >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> > >> +
> > >> +    rvsp_flash_create(s);
> > >> +}
> > >> +
> > >> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
> > >> +{
> > >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> > >> +    char val[32];
> > >> +
> > >> +    sprintf(val, "%d", s->aia_guests);
> > >> +    return g_strdup(val);
> > >> +}
> > >> +
> > >> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
> > >> +{
> > >> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
> > >> +
> > >> +    s->aia_guests = atoi(val);
> > >> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
> > >> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
> > >> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
> > >> +                          RVSP_IRQCHIP_MAX_GUESTS);
> > >> +    }
> > >> +}
> > >> +
> > >> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
> > >> +{
> > >> +    char str[128];
> > >> +    MachineClass *mc = MACHINE_CLASS(oc);
> > >> +
> > >> +    mc->desc = "RISC-V Server SoC Reference board";
> > >> +    mc->init = rvsp_ref_machine_init;
> > >> +    mc->max_cpus = RVSP_CPUS_MAX;
> > >> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
> > >> +    mc->pci_allow_0_address = true;
> > >> +    mc->default_nic = "e1000e";
> > >> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
> > >> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
> > >> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
> > >> +    mc->numa_mem_supported = true;
> > >> +    /* platform instead of architectural choice */
> > >> +    mc->cpu_cluster_has_numa_boundary = true;
> > >> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
> > >> +
> > >> +    object_class_property_add_str(oc, "aia-guests",
> > >> +                                  rvsp_ref_get_aia_guests,
> > >> +                                  rvsp_ref_set_aia_guests);
> > >> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
> > >> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
> > >> +    object_class_property_set_description(oc, "aia-guests", str);
> > >> +}
> > >> +
> > >> +static const TypeInfo rvsp_ref_typeinfo = {
> > >> +    .name       = TYPE_RVSP_REF_MACHINE,
> > >> +    .parent     = TYPE_MACHINE,
> > >> +    .class_init = rvsp_ref_machine_class_init,
> > >> +    .instance_init = rvsp_ref_machine_instance_init,
> > >> +    .instance_size = sizeof(RVSPMachineState),
> > >> +};
> > >> +
> > >> +static void rvsp_ref_init_register_types(void)
> > >> +{
> > >> +    type_register_static(&rvsp_ref_typeinfo);
> > >> +}
> > >> +
> > >> +type_init(rvsp_ref_init_register_types)
> > >> --
> > >> 2.34.1
> > >>
> > >>
> >
Wu, Fei March 7, 2024, 6:26 a.m. UTC | #10
On 3/7/2024 8:48 AM, Alistair Francis wrote:
> On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
>>
>> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
>>>
>>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
>>>> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
>>>>>
>>>>> The RISC-V Server Platform specification[1] defines a standardized set
>>>>> of hardware and software capabilities, that portable system software,
>>>>> such as OS and hypervisors can rely on being present in a RISC-V server
>>>>> platform.
>>>>>
>>>>> A corresponding Qemu RISC-V server platform reference (rvsp-ref for
>>>>> short) machine type is added to provide a environment for firmware/OS
>>>>> development and testing. The main features included in rvsp-ref are:
>>>>>
>>>>>  - Based on riscv virt machine type
>>>>>  - A new memory map as close as virt machine as possible
>>>>>  - A new virt CPU type rvsp-ref-cpu for server platform compliance
>>>>>  - AIA
>>>>>  - PCIe AHCI
>>>>>  - PCIe NIC
>>>>>  - No virtio device
>>>>>  - No fw_cfg device
>>>>>  - No ACPI table provided
>>>>>  - Only minimal device tree nodes
>>>>>
>>>>> [1] https://github.com/riscv-non-isa/riscv-server-platform
>>>>
>>>> + Atish
>>>>
>>>>>
>>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>>> ---
>>>>>  configs/devices/riscv64-softmmu/default.mak |    1 +
>>>>>  hw/riscv/Kconfig                            |   13 +
>>>>>  hw/riscv/meson.build                        |    1 +
>>>>>  hw/riscv/server_platform_ref.c              | 1244 +++++++++++++++++++
>>>>>  4 files changed, 1259 insertions(+)
>>>>>  create mode 100644 hw/riscv/server_platform_ref.c
>>>>>
>>>>> diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
>>>>> index 3f68059448..a1d98e49ef 100644
>>>>> --- a/configs/devices/riscv64-softmmu/default.mak
>>>>> +++ b/configs/devices/riscv64-softmmu/default.mak
>>>>> @@ -10,5 +10,6 @@ CONFIG_SPIKE=y
>>>>>  CONFIG_SIFIVE_E=y
>>>>>  CONFIG_SIFIVE_U=y
>>>>>  CONFIG_RISCV_VIRT=y
>>>>> +CONFIG_SERVER_PLATFORM_REF=y
>>>>>  CONFIG_MICROCHIP_PFSOC=y
>>>>>  CONFIG_SHAKTI_C=y
>>>>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
>>>>> index 5d644eb7b1..debac5a7f5 100644
>>>>> --- a/hw/riscv/Kconfig
>>>>> +++ b/hw/riscv/Kconfig
>>>>> @@ -48,6 +48,19 @@ config RISCV_VIRT
>>>>>      select ACPI
>>>>>      select ACPI_PCI
>>>>>
>>>>> +config SERVER_PLATFORM_REF
>>>>> +    bool
>>>>> +    select RISCV_NUMA
>>>>> +    select GOLDFISH_RTC
>>>>> +    select PCI
>>>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
>>>>> +    select PFLASH_CFI01
>>>>> +    select SERIAL
>>>>> +    select RISCV_ACLINT
>>>>> +    select RISCV_APLIC
>>>>> +    select RISCV_IMSIC
>>>>> +    select SIFIVE_TEST
>>>>
>>>> Do we really need SiFive Test in the server platform?
>>>>
>>> It's used to reset the system, is there any better choice?
> 
> If we add this now we are stuck with it forever (or at least a long
> time). So it'd be nice to think about these and decide if these really
> are the best way to do things. We don't have to just copy the existing
> virt machine.
> 
We need a solution to poweroff/reboot, and sifive test is one of the
hardware implementations, so in general I think it's okay. But I agree
Sifive test looks a device for testing only.

> There must be a more standard way to do this then MMIO mapped SiFive hardware?
> 
The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
two devices look very similar except different encodings of the
shutdown/reboot command.

Probably we can have a generic shutdown/reboot device in QEMU for both
sifive test and sbsa_ec, and likely more (not in this patch series). In
this way, sifive test device will be replaced by this more generic
device. Any suggestions?

>>>
>>> Probably I can remove the "sifive,test1 sifive,test0" from the
>>> compatible list in fdt, and only keep "syscon", I see opensbi has
>>> already removed that support in commit c2e602707.
>>>
>>>> Same with the goldfish RTC?
>>>>
>>> Although the spec doesn't make RTC mandatory, it should be a common
>>> practice having a RTC on server, so I add a RTC here no matter it's
>>> goldfish or not.
>>>
>>
>> The platform spec says
>> HPER_070 : A battery-backed RTC or analogous timekeeping mechanism
>> MUST be implemented.
>>
Thank Atish for pointing this out.

>> Can we consider goldfish RTC in this category ?
> 
> I think so, although I haven't read the spec yet :)
> 
Yes, I think goldfish RTC falls in this category.

> My point was more that if we are going to implement a new machine, we
> should aim to standardise on things that other
> machines/servers/platforms/architectures do. Some of the things in the
> virt machine are historical because that's what worked at the time.
> But with a clean slate design there might be better alternatives.
> Obviously while still sticking to the spec
> 
>>
>> But I want to discuss a larger point as the server platform/SoC spec
>> defines a bunch of optional requirement.
>> Does this platform intend to be a platform that is a superset of all
>> those options or allow optionality in
>> the platform as well ?
> 
> Generally I feel that QEMU has the luxury of just supporting
> everything. If there is an optional component created by the machine
> it generally is fine if the guest doesn't use it. While creating
> complex configuration options is a pain for users
> 
The primary goal of this reference machine is for compatibility, so the
mandatory ones should be integrated if possible. Regarding the optional
ones, I think it's okay to have if they bring the convenience, and I'm
open to it.

Thanks,
Fei.

> Alistair
> 
>>
>>>>> +
>>>>>  config SHAKTI_C
>>>>>      bool
>>>>>      select RISCV_ACLINT
>>>>> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
>>>>> index 2f7ee81be3..bb3aff91ea 100644
>>>>> --- a/hw/riscv/meson.build
>>>>> +++ b/hw/riscv/meson.build
>>>>> @@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
>>>>>  riscv_ss.add(files('riscv_hart.c'))
>>>>>  riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
>>>>>  riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
>>>>> +riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
>>>>>  riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
>>>>>  riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
>>>>>  riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
>>>>> diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
>>>>> new file mode 100644
>>>>> index 0000000000..ae90c4b27a
>>>>> --- /dev/null
>>>>> +++ b/hw/riscv/server_platform_ref.c
>>>>> @@ -0,0 +1,1244 @@
>>>>> +/*
>>>>> + * QEMU RISC-V Server Platfrom (RVSP) Reference Board
>>>>
>>>> Platform
>>>>
>>> OK.
>>>
>>>>> +static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
>>>>> +                                          DeviceState *irqchip,
>>>>> +                                          RVSPMachineState *s)
>>>>> +{
>>>>> +    DeviceState *dev;
>>>>> +    PCIHostState *pci;
>>>>> +    PCIDevice *pdev_ahci;
>>>>> +    AHCIPCIState *ich9;
>>>>> +    DriveInfo *hd[NUM_SATA_PORTS];
>>>>> +    MemoryRegion *ecam_alias, *ecam_reg;
>>>>> +    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
>>>>> +    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
>>>>> +    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
>>>>> +    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
>>>>> +    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
>>>>> +    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
>>>>> +    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
>>>>> +    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
>>>>> +    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
>>>>> +    MachineClass *mc = MACHINE_GET_CLASS(s);
>>>>> +    qemu_irq irq;
>>>>> +    int i;
>>>>> +
>>>>> +    dev = qdev_new(TYPE_GPEX_HOST);
>>>>> +
>>>>> +    /* Set GPEX object properties for the rvsp ref machine */
>>>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
>>>>> +                            ecam_base, NULL);
>>>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
>>>>> +                            ecam_size, NULL);
>>>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>>>>> +                             PCI_HOST_BELOW_4G_MMIO_BASE,
>>>>> +                             mmio_base, NULL);
>>>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
>>>>> +                            mmio_size, NULL);
>>>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
>>>>> +                             PCI_HOST_ABOVE_4G_MMIO_BASE,
>>>>> +                             high_mmio_base, NULL);
>>>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
>>>>> +                            high_mmio_size, NULL);
>>>>> +    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
>>>>> +                            pio_base, NULL);
>>>>> +    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
>>>>> +                            pio_size, NULL);
>>>>> +
>>>>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>>>>> +
>>>>> +    ecam_alias = g_new0(MemoryRegion, 1);
>>>>> +    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
>>>>> +    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
>>>>> +                             ecam_reg, 0, ecam_size);
>>>>> +    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
>>>>> +
>>>>> +    mmio_alias = g_new0(MemoryRegion, 1);
>>>>> +    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
>>>>> +    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
>>>>> +                             mmio_reg, mmio_base, mmio_size);
>>>>> +    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
>>>>> +
>>>>> +    /* Map high MMIO space */
>>>>> +    high_mmio_alias = g_new0(MemoryRegion, 1);
>>>>> +    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
>>>>> +                             mmio_reg, high_mmio_base, high_mmio_size);
>>>>> +    memory_region_add_subregion(get_system_memory(), high_mmio_base,
>>>>> +                                high_mmio_alias);
>>>>> +
>>>>> +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
>>>>> +
>>>>> +    for (i = 0; i < GPEX_NUM_IRQS; i++) {
>>>>> +        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
>>>>> +
>>>>> +        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
>>>>> +        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
>>>>> +    }
>>>>> +
>>>>> +    pci = PCI_HOST_BRIDGE(dev);
>>>>> +    pci_init_nic_devices(pci->bus, mc->default_nic);
>>>>> +    /* IDE disk setup.  */
>>>>> +    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
>>>>> +    ich9 = ICH9_AHCI(pdev_ahci);
>>>>> +    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
>>>>> +    ide_drive_get(hd, ich9->ahci.ports);
>>>>> +    ahci_ide_create_devs(&ich9->ahci, hd);
>>>>> +
>>>>> +    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
>>>>> +    return dev;
>>>>> +}
>>>>
>>>> We should share as much of this code as possible with the virt machine.
>>>>
>>> This function references multiple rvsp specific variables, some
>>> refactorings are needed to group all the information of
>>> ecam/mmio/high_mmio/pio/irq and pass them to virt and rvsp respectively.
>>> I see arm virt/sbsa-ref are seperated clearly and no sharing between
>>> them. It has the benefit there is no side effect between them. Should we
>>> use the same policy?
>>>
>>> Thanks,
>>> Fei.
>>>
>>>> Alistair
>>>>
>>>>> +
>>>>> +static DeviceState *rvsp_ref_create_aia(int aia_guests,
>>>>> +                                        const MemMapEntry *memmap, int socket,
>>>>> +                                        int base_hartid, int hart_count)
>>>>> +{
>>>>> +    int i;
>>>>> +    hwaddr addr;
>>>>> +    uint32_t guest_bits;
>>>>> +    DeviceState *aplic_s = NULL;
>>>>> +    DeviceState *aplic_m = NULL;
>>>>> +    bool msimode = true;
>>>>> +
>>>>> +    /* Per-socket M-level IMSICs */
>>>>> +    addr = memmap[RVSP_IMSIC_M].base +
>>>>> +           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>>>>> +    for (i = 0; i < hart_count; i++) {
>>>>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
>>>>> +                           base_hartid + i, true, 1,
>>>>> +                           RVSP_IRQCHIP_NUM_MSIS);
>>>>> +    }
>>>>> +
>>>>> +    /* Per-socket S-level IMSICs */
>>>>> +    guest_bits = imsic_num_bits(aia_guests + 1);
>>>>> +    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
>>>>> +    for (i = 0; i < hart_count; i++) {
>>>>> +        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
>>>>> +                           base_hartid + i, false, 1 + aia_guests,
>>>>> +                           RVSP_IRQCHIP_NUM_MSIS);
>>>>> +    }
>>>>> +
>>>>> +    /* Per-socket M-level APLIC */
>>>>> +    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
>>>>> +                                 socket * memmap[RVSP_APLIC_M].size,
>>>>> +                                 memmap[RVSP_APLIC_M].size,
>>>>> +                                 (msimode) ? 0 : base_hartid,
>>>>> +                                 (msimode) ? 0 : hart_count,
>>>>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>>>>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>>>>> +                                 msimode, true, NULL);
>>>>> +
>>>>> +    /* Per-socket S-level APLIC */
>>>>> +    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
>>>>> +                                 socket * memmap[RVSP_APLIC_S].size,
>>>>> +                                 memmap[RVSP_APLIC_S].size,
>>>>> +                                 (msimode) ? 0 : base_hartid,
>>>>> +                                 (msimode) ? 0 : hart_count,
>>>>> +                                 RVSP_IRQCHIP_NUM_SOURCES,
>>>>> +                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
>>>>> +                                 msimode, false, aplic_m);
>>>>> +
>>>>> +    (void)aplic_s;
>>>>> +    return aplic_m;
>>>>> +}
>>>>> +
>>>>> +static void rvsp_ref_machine_done(Notifier *notifier, void *data)
>>>>> +{
>>>>> +    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
>>>>> +                                       machine_done);
>>>>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>>>>> +    MachineState *machine = MACHINE(s);
>>>>> +    target_ulong start_addr = memmap[RVSP_DRAM].base;
>>>>> +    target_ulong firmware_end_addr, kernel_start_addr;
>>>>> +    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
>>>>> +    uint64_t fdt_load_addr;
>>>>> +    uint64_t kernel_entry = 0;
>>>>> +    BlockBackend *pflash_blk0;
>>>>> +
>>>>> +    /* load/create device tree */
>>>>> +    if (machine->dtb) {
>>>>> +        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
>>>>> +        if (!machine->fdt) {
>>>>> +            error_report("load_device_tree() failed");
>>>>> +            exit(1);
>>>>> +        }
>>>>> +    } else {
>>>>> +        create_fdt(s, memmap);
>>>>> +    }
>>>>> +
>>>>> +    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
>>>>> +                                                     start_addr, NULL);
>>>>> +
>>>>> +    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
>>>>> +    if (pflash_blk0) {
>>>>> +        if (machine->firmware && !strcmp(machine->firmware, "none")) {
>>>>> +            /*
>>>>> +             * Pflash was supplied but bios is none and not KVM guest,
>>>>> +             * let's overwrite the address we jump to after reset to
>>>>> +             * the base of the flash.
>>>>> +             */
>>>>> +            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
>>>>> +        } else {
>>>>> +            /*
>>>>> +             * Pflash was supplied but either KVM guest or bios is not none.
>>>>> +             * In this case, base of the flash would contain S-mode payload.
>>>>> +             */
>>>>> +            riscv_setup_firmware_boot(machine);
>>>>> +            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    if (machine->kernel_filename && !kernel_entry) {
>>>>> +        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>>>>> +                                                         firmware_end_addr);
>>>>> +
>>>>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>>>>> +                                         kernel_start_addr, true, NULL);
>>>>> +    }
>>>>> +
>>>>> +    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
>>>>> +                                           memmap[RVSP_DRAM].size,
>>>>> +                                           machine);
>>>>> +    riscv_load_fdt(fdt_load_addr, machine->fdt);
>>>>> +
>>>>> +    /* load the reset vector */
>>>>> +    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
>>>>> +                              rvsp_ref_memmap[RVSP_MROM].base,
>>>>> +                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
>>>>> +                              fdt_load_addr);
>>>>> +
>>>>> +}
>>>>> +
>>>>> +static void rvsp_ref_machine_init(MachineState *machine)
>>>>> +{
>>>>> +    const MemMapEntry *memmap = rvsp_ref_memmap;
>>>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
>>>>> +    MemoryRegion *system_memory = get_system_memory();
>>>>> +    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
>>>>> +    char *soc_name;
>>>>> +    DeviceState *mmio_irqchip, *pcie_irqchip;
>>>>> +    int i, base_hartid, hart_count;
>>>>> +    int socket_count = riscv_socket_count(machine);
>>>>> +
>>>>> +    /* Check socket count limit */
>>>>> +    if (RVSP_SOCKETS_MAX < socket_count) {
>>>>> +        error_report("number of sockets/nodes should be less than %d",
>>>>> +            RVSP_SOCKETS_MAX);
>>>>> +        exit(1);
>>>>> +    }
>>>>> +
>>>>> +    if (!tcg_enabled()) {
>>>>> +        error_report("'aclint' is only available with TCG acceleration");
>>>>> +        exit(1);
>>>>> +    }
>>>>> +
>>>>> +    /* Initialize sockets */
>>>>> +    mmio_irqchip = pcie_irqchip = NULL;
>>>>> +    for (i = 0; i < socket_count; i++) {
>>>>> +        if (!riscv_socket_check_hartids(machine, i)) {
>>>>> +            error_report("discontinuous hartids in socket%d", i);
>>>>> +            exit(1);
>>>>> +        }
>>>>> +
>>>>> +        base_hartid = riscv_socket_first_hartid(machine, i);
>>>>> +        if (base_hartid < 0) {
>>>>> +            error_report("can't find hartid base for socket%d", i);
>>>>> +            exit(1);
>>>>> +        }
>>>>> +
>>>>> +        hart_count = riscv_socket_hart_count(machine, i);
>>>>> +        if (hart_count < 0) {
>>>>> +            error_report("can't find hart count for socket%d", i);
>>>>> +            exit(1);
>>>>> +        }
>>>>> +
>>>>> +        soc_name = g_strdup_printf("soc%d", i);
>>>>> +        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
>>>>> +                                TYPE_RISCV_HART_ARRAY);
>>>>> +        g_free(soc_name);
>>>>> +        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
>>>>> +                                machine->cpu_type, &error_abort);
>>>>> +        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
>>>>> +                                base_hartid, &error_abort);
>>>>> +        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
>>>>> +                                hart_count, &error_abort);
>>>>> +        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
>>>>> +
>>>>> +        /* Per-socket ACLINT MTIMER */
>>>>> +        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
>>>>> +                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>>>>> +            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
>>>>> +            base_hartid, hart_count,
>>>>> +            RISCV_ACLINT_DEFAULT_MTIMECMP,
>>>>> +            RISCV_ACLINT_DEFAULT_MTIME,
>>>>> +            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
>>>>> +
>>>>> +        /* Per-socket interrupt controller */
>>>>> +        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
>>>>> +                                            memmap, i, base_hartid,
>>>>> +                                            hart_count);
>>>>> +
>>>>> +        /* Try to use different IRQCHIP instance based device type */
>>>>> +        if (i == 0) {
>>>>> +            mmio_irqchip = s->irqchip[i];
>>>>> +            pcie_irqchip = s->irqchip[i];
>>>>> +        }
>>>>> +        if (i == 1) {
>>>>> +            pcie_irqchip = s->irqchip[i];
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    s->memmap = rvsp_ref_memmap;
>>>>> +
>>>>> +    /* register system main memory (actual RAM) */
>>>>> +    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
>>>>> +        machine->ram);
>>>>> +
>>>>> +    /* boot rom */
>>>>> +    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
>>>>> +                           memmap[RVSP_MROM].size, &error_fatal);
>>>>> +    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
>>>>> +                                mask_rom);
>>>>> +
>>>>> +    /* SiFive Test MMIO device */
>>>>> +    sifive_test_create(memmap[RVSP_TEST].base);
>>>>> +
>>>>> +    gpex_pcie_init(system_memory, pcie_irqchip, s);
>>>>> +
>>>>> +    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
>>>>> +        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
>>>>> +        serial_hd(0), DEVICE_LITTLE_ENDIAN);
>>>>> +
>>>>> +    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
>>>>> +        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
>>>>> +
>>>>> +    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
>>>>> +        /* Map legacy -drive if=pflash to machine properties */
>>>>> +        pflash_cfi01_legacy_drive(s->flash[i],
>>>>> +                                  drive_get(IF_PFLASH, 0, i));
>>>>> +    }
>>>>> +    rvsp_flash_map(s, system_memory);
>>>>> +
>>>>> +    s->machine_done.notify = rvsp_ref_machine_done;
>>>>> +    qemu_add_machine_init_done_notifier(&s->machine_done);
>>>>> +}
>>>>> +
>>>>> +static void rvsp_ref_machine_instance_init(Object *obj)
>>>>> +{
>>>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>>>> +
>>>>> +    rvsp_flash_create(s);
>>>>> +}
>>>>> +
>>>>> +static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
>>>>> +{
>>>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>>>> +    char val[32];
>>>>> +
>>>>> +    sprintf(val, "%d", s->aia_guests);
>>>>> +    return g_strdup(val);
>>>>> +}
>>>>> +
>>>>> +static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
>>>>> +{
>>>>> +    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
>>>>> +
>>>>> +    s->aia_guests = atoi(val);
>>>>> +    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
>>>>> +        error_setg(errp, "Invalid number of AIA IMSIC guests");
>>>>> +        error_append_hint(errp, "Valid values be between 0 and %d.\n",
>>>>> +                          RVSP_IRQCHIP_MAX_GUESTS);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
>>>>> +{
>>>>> +    char str[128];
>>>>> +    MachineClass *mc = MACHINE_CLASS(oc);
>>>>> +
>>>>> +    mc->desc = "RISC-V Server SoC Reference board";
>>>>> +    mc->init = rvsp_ref_machine_init;
>>>>> +    mc->max_cpus = RVSP_CPUS_MAX;
>>>>> +    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
>>>>> +    mc->pci_allow_0_address = true;
>>>>> +    mc->default_nic = "e1000e";
>>>>> +    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
>>>>> +    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
>>>>> +    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
>>>>> +    mc->numa_mem_supported = true;
>>>>> +    /* platform instead of architectural choice */
>>>>> +    mc->cpu_cluster_has_numa_boundary = true;
>>>>> +    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
>>>>> +
>>>>> +    object_class_property_add_str(oc, "aia-guests",
>>>>> +                                  rvsp_ref_get_aia_guests,
>>>>> +                                  rvsp_ref_set_aia_guests);
>>>>> +    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
>>>>> +                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
>>>>> +    object_class_property_set_description(oc, "aia-guests", str);
>>>>> +}
>>>>> +
>>>>> +static const TypeInfo rvsp_ref_typeinfo = {
>>>>> +    .name       = TYPE_RVSP_REF_MACHINE,
>>>>> +    .parent     = TYPE_MACHINE,
>>>>> +    .class_init = rvsp_ref_machine_class_init,
>>>>> +    .instance_init = rvsp_ref_machine_instance_init,
>>>>> +    .instance_size = sizeof(RVSPMachineState),
>>>>> +};
>>>>> +
>>>>> +static void rvsp_ref_init_register_types(void)
>>>>> +{
>>>>> +    type_register_static(&rvsp_ref_typeinfo);
>>>>> +}
>>>>> +
>>>>> +type_init(rvsp_ref_init_register_types)
>>>>> --
>>>>> 2.34.1
>>>>>
>>>>>
>>>
Andrew Jones March 8, 2024, 9:20 a.m. UTC | #11
On Thu, Mar 07, 2024 at 02:26:18PM +0800, Wu, Fei wrote:
> On 3/7/2024 8:48 AM, Alistair Francis wrote:
> > On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
> >>
> >> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
> >>>
> >>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
> >>>> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
...
> >>>>> +config SERVER_PLATFORM_REF
> >>>>> +    bool
> >>>>> +    select RISCV_NUMA
> >>>>> +    select GOLDFISH_RTC
> >>>>> +    select PCI
> >>>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
> >>>>> +    select PFLASH_CFI01
> >>>>> +    select SERIAL
> >>>>> +    select RISCV_ACLINT
> >>>>> +    select RISCV_APLIC
> >>>>> +    select RISCV_IMSIC
> >>>>> +    select SIFIVE_TEST
> >>>>
> >>>> Do we really need SiFive Test in the server platform?
> >>>>
> >>> It's used to reset the system, is there any better choice?
> > 
> > If we add this now we are stuck with it forever (or at least a long
> > time). So it'd be nice to think about these and decide if these really
> > are the best way to do things. We don't have to just copy the existing
> > virt machine.
> > 
> We need a solution to poweroff/reboot, and sifive test is one of the
> hardware implementations, so in general I think it's okay. But I agree
> Sifive test looks a device for testing only.
> 
> > There must be a more standard way to do this then MMIO mapped SiFive hardware?
> > 
> The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
> generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
> two devices look very similar except different encodings of the
> shutdown/reboot command.
> 
> Probably we can have a generic shutdown/reboot device in QEMU for both
> sifive test and sbsa_ec, and likely more (not in this patch series). In
> this way, sifive test device will be replaced by this more generic
> device. Any suggestions?

Operating systems shouldn't need to implement odd-ball device drivers to
function on a reference of a standard platform. So the reference platform
should only be comprised of devices which have specifications and already,
or will, have DT bindings. Generic devices would be best, but I don't
think it should be a problem to use devices from multiple vendors. The
devices just need to allow GPL drivers to be written. With all that in
mind, what about adding a generic GPIO controller or using SiFive's GPIO
controller. Then, we could add gpio-restart and gpio-poweroff.

Thanks,
drew
Wu, Fei March 11, 2024, 11:55 a.m. UTC | #12
On 3/8/2024 5:20 PM, Andrew Jones wrote:
> On Thu, Mar 07, 2024 at 02:26:18PM +0800, Wu, Fei wrote:
>> On 3/7/2024 8:48 AM, Alistair Francis wrote:
>>> On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
>>>>
>>>> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
>>>>>
>>>>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
>>>>>> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
> ...
>>>>>>> +config SERVER_PLATFORM_REF
>>>>>>> +    bool
>>>>>>> +    select RISCV_NUMA
>>>>>>> +    select GOLDFISH_RTC
>>>>>>> +    select PCI
>>>>>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
>>>>>>> +    select PFLASH_CFI01
>>>>>>> +    select SERIAL
>>>>>>> +    select RISCV_ACLINT
>>>>>>> +    select RISCV_APLIC
>>>>>>> +    select RISCV_IMSIC
>>>>>>> +    select SIFIVE_TEST
>>>>>>
>>>>>> Do we really need SiFive Test in the server platform?
>>>>>>
>>>>> It's used to reset the system, is there any better choice?
>>>
>>> If we add this now we are stuck with it forever (or at least a long
>>> time). So it'd be nice to think about these and decide if these really
>>> are the best way to do things. We don't have to just copy the existing
>>> virt machine.
>>>
>> We need a solution to poweroff/reboot, and sifive test is one of the
>> hardware implementations, so in general I think it's okay. But I agree
>> Sifive test looks a device for testing only.
>>
>>> There must be a more standard way to do this then MMIO mapped SiFive hardware?
>>>
>> The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
>> generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
>> two devices look very similar except different encodings of the
>> shutdown/reboot command.
>>
>> Probably we can have a generic shutdown/reboot device in QEMU for both
>> sifive test and sbsa_ec, and likely more (not in this patch series). In
>> this way, sifive test device will be replaced by this more generic
>> device. Any suggestions?
> 
> Operating systems shouldn't need to implement odd-ball device drivers to
> function on a reference of a standard platform. So the reference platform
> should only be comprised of devices which have specifications and already,
> or will, have DT bindings. Generic devices would be best, but I don't
> think it should be a problem to use devices from multiple vendors. The
> devices just need to allow GPL drivers to be written. With all that in
> mind, what about adding a generic GPIO controller or using SiFive's GPIO
> controller. Then, we could add gpio-restart and gpio-poweroff.
> 
I agree with most of what you said. Regarding generic devices, syscon
looks a better choice than gpio in the current situation.

Linux kernel has these configurations enabled for virt, and I'm not
going to add a new soc for this new board currently, we can use the same
syscon interface for power, and it has already well supported.

config SOC_VIRT
	bool "QEMU Virt Machine"
	select CLINT_TIMER if RISCV_M_MODE
	select POWER_RESET
	select POWER_RESET_SYSCON
	select POWER_RESET_SYSCON_POWEROFF
	select GOLDFISH

For the qemu part, we can remove device 'sifive_test' and manage that
memory region directly with MemoryRegionOps, similar to what
hw/mips/boston.c does.

Thanks,
Fei.

> Thanks,
> drew
> 
> 
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#50): https://lists.riscv.org/g/tech-server-platform/message/50
> Mute This Topic: https://lists.riscv.org/mt/104719372/7152971
> Group Owner: tech-server-platform+owner@lists.riscv.org
> Unsubscribe: https://lists.riscv.org/g/tech-server-platform/unsub [fei2.wu@intel.com]
> -=-=-=-=-=-=-=-=-=-=-=-
> 
>
Andrew Jones March 11, 2024, 2:38 p.m. UTC | #13
On Mon, Mar 11, 2024 at 04:55:24AM -0700, Wu, Fei2 wrote:
> On 3/8/2024 5:20 PM, Andrew Jones wrote:
> > On Thu, Mar 07, 2024 at 02:26:18PM +0800, Wu, Fei wrote:
> >> On 3/7/2024 8:48 AM, Alistair Francis wrote:
> >>> On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
> >>>>
> >>>> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
> >>>>>
> >>>>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
> >>>>>> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
> > ...
> >>>>>>> +config SERVER_PLATFORM_REF
> >>>>>>> +    bool
> >>>>>>> +    select RISCV_NUMA
> >>>>>>> +    select GOLDFISH_RTC
> >>>>>>> +    select PCI
> >>>>>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
> >>>>>>> +    select PFLASH_CFI01
> >>>>>>> +    select SERIAL
> >>>>>>> +    select RISCV_ACLINT
> >>>>>>> +    select RISCV_APLIC
> >>>>>>> +    select RISCV_IMSIC
> >>>>>>> +    select SIFIVE_TEST
> >>>>>>
> >>>>>> Do we really need SiFive Test in the server platform?
> >>>>>>
> >>>>> It's used to reset the system, is there any better choice?
> >>>
> >>> If we add this now we are stuck with it forever (or at least a long
> >>> time). So it'd be nice to think about these and decide if these really
> >>> are the best way to do things. We don't have to just copy the existing
> >>> virt machine.
> >>>
> >> We need a solution to poweroff/reboot, and sifive test is one of the
> >> hardware implementations, so in general I think it's okay. But I agree
> >> Sifive test looks a device for testing only.
> >>
> >>> There must be a more standard way to do this then MMIO mapped SiFive hardware?
> >>>
> >> The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
> >> generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
> >> two devices look very similar except different encodings of the
> >> shutdown/reboot command.
> >>
> >> Probably we can have a generic shutdown/reboot device in QEMU for both
> >> sifive test and sbsa_ec, and likely more (not in this patch series). In
> >> this way, sifive test device will be replaced by this more generic
> >> device. Any suggestions?
> > 
> > Operating systems shouldn't need to implement odd-ball device drivers to
> > function on a reference of a standard platform. So the reference platform
> > should only be comprised of devices which have specifications and already,
> > or will, have DT bindings. Generic devices would be best, but I don't
> > think it should be a problem to use devices from multiple vendors. The
> > devices just need to allow GPL drivers to be written. With all that in
> > mind, what about adding a generic GPIO controller or using SiFive's GPIO
> > controller. Then, we could add gpio-restart and gpio-poweroff.
> > 
> I agree with most of what you said. Regarding generic devices, syscon
> looks a better choice than gpio in the current situation.
> 
> Linux kernel has these configurations enabled for virt, and I'm not
> going to add a new soc for this new board currently, we can use the same
> syscon interface for power, and it has already well supported.
> 
> config SOC_VIRT
> 	bool "QEMU Virt Machine"
> 	select CLINT_TIMER if RISCV_M_MODE
> 	select POWER_RESET
> 	select POWER_RESET_SYSCON
> 	select POWER_RESET_SYSCON_POWEROFF
> 	select GOLDFISH
> 
> For the qemu part, we can remove device 'sifive_test' and manage that
> memory region directly with MemoryRegionOps, similar to what
> hw/mips/boston.c does.

OK, that sounds good. Also, I guess the real concern is whether firmware
(e.g. OpenSBI) supports the platform's power-off device, since firmware
will present the SRST SBI call to Linux, so Linux doesn't need to worry
about it at all. However, if we want Linux to worry about it, then we
can't forget to ensure we can implement the syscon interface in AML for
ACPI too. Indeed, we should be introducing ACPI support for this reference
machine type at the same time we introduce the machine in order to ensure
all device selections have, or will have, both DT and ACPI support.

Thanks,
drew
Atish Patra March 12, 2024, 12:19 a.m. UTC | #14
On Mon, Mar 11, 2024 at 7:38 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Mon, Mar 11, 2024 at 04:55:24AM -0700, Wu, Fei2 wrote:
> > On 3/8/2024 5:20 PM, Andrew Jones wrote:
> > > On Thu, Mar 07, 2024 at 02:26:18PM +0800, Wu, Fei wrote:
> > >> On 3/7/2024 8:48 AM, Alistair Francis wrote:
> > >>> On Thu, Mar 7, 2024 at 5:13 AM Atish Kumar Patra <atishp@rivosinc.com> wrote:
> > >>>>
> > >>>> On Wed, Mar 6, 2024 at 4:56 AM Wu, Fei <fei2.wu@intel.com> wrote:
> > >>>>>
> > >>>>> On 3/6/2024 8:19 AM, Alistair Francis wrote:
> > >>>>>> On Mon, Mar 4, 2024 at 8:28 PM Fei Wu <fei2.wu@intel.com> wrote:
> > > ...
> > >>>>>>> +config SERVER_PLATFORM_REF
> > >>>>>>> +    bool
> > >>>>>>> +    select RISCV_NUMA
> > >>>>>>> +    select GOLDFISH_RTC
> > >>>>>>> +    select PCI
> > >>>>>>> +    select PCI_EXPRESS_GENERIC_BRIDGE
> > >>>>>>> +    select PFLASH_CFI01
> > >>>>>>> +    select SERIAL
> > >>>>>>> +    select RISCV_ACLINT
> > >>>>>>> +    select RISCV_APLIC
> > >>>>>>> +    select RISCV_IMSIC
> > >>>>>>> +    select SIFIVE_TEST
> > >>>>>>
> > >>>>>> Do we really need SiFive Test in the server platform?
> > >>>>>>
> > >>>>> It's used to reset the system, is there any better choice?
> > >>>
> > >>> If we add this now we are stuck with it forever (or at least a long
> > >>> time). So it'd be nice to think about these and decide if these really
> > >>> are the best way to do things. We don't have to just copy the existing
> > >>> virt machine.
> > >>>
> > >> We need a solution to poweroff/reboot, and sifive test is one of the
> > >> hardware implementations, so in general I think it's okay. But I agree
> > >> Sifive test looks a device for testing only.
> > >>
> > >>> There must be a more standard way to do this then MMIO mapped SiFive hardware?
> > >>>
> > >> The mapped MMIO mechanism leveraged by Sifive test by itself is kinda
> > >> generic, the sbsa_ec for sbsa-ref is also an MMIO mapped device. These
> > >> two devices look very similar except different encodings of the
> > >> shutdown/reboot command.
> > >>
> > >> Probably we can have a generic shutdown/reboot device in QEMU for both
> > >> sifive test and sbsa_ec, and likely more (not in this patch series). In
> > >> this way, sifive test device will be replaced by this more generic
> > >> device. Any suggestions?
> > >
> > > Operating systems shouldn't need to implement odd-ball device drivers to
> > > function on a reference of a standard platform. So the reference platform
> > > should only be comprised of devices which have specifications and already,
> > > or will, have DT bindings. Generic devices would be best, but I don't
> > > think it should be a problem to use devices from multiple vendors. The
> > > devices just need to allow GPL drivers to be written. With all that in
> > > mind, what about adding a generic GPIO controller or using SiFive's GPIO
> > > controller. Then, we could add gpio-restart and gpio-poweroff.
> > >
> > I agree with most of what you said. Regarding generic devices, syscon
> > looks a better choice than gpio in the current situation.
> >
> > Linux kernel has these configurations enabled for virt, and I'm not
> > going to add a new soc for this new board currently, we can use the same
> > syscon interface for power, and it has already well supported.
> >
> > config SOC_VIRT
> >       bool "QEMU Virt Machine"
> >       select CLINT_TIMER if RISCV_M_MODE
> >       select POWER_RESET
> >       select POWER_RESET_SYSCON
> >       select POWER_RESET_SYSCON_POWEROFF
> >       select GOLDFISH
> >
> > For the qemu part, we can remove device 'sifive_test' and manage that
> > memory region directly with MemoryRegionOps, similar to what
> > hw/mips/boston.c does.
>
> OK, that sounds good. Also, I guess the real concern is whether firmware
> (e.g. OpenSBI) supports the platform's power-off device, since firmware
> will present the SRST SBI call to Linux, so Linux doesn't need to worry
> about it at all. However, if we want Linux to worry about it, then we

Syscon devices are already supported in OpenSBI. So syscon seems to be
the best option right now.

> can't forget to ensure we can implement the syscon interface in AML for
> ACPI too. Indeed, we should be introducing ACPI support for this reference
> machine type at the same time we introduce the machine in order to ensure
> all device selections have, or will have, both DT and ACPI support.
>

Yeah. In addition to that, this reference platform also needs to
generate minimalistic DT
for OpenSBI even though only ACPI is required for Linux. IIRC,
sbsa-ref also does something similar.

> Thanks,
> drew
diff mbox series

Patch

diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
index 3f68059448..a1d98e49ef 100644
--- a/configs/devices/riscv64-softmmu/default.mak
+++ b/configs/devices/riscv64-softmmu/default.mak
@@ -10,5 +10,6 @@  CONFIG_SPIKE=y
 CONFIG_SIFIVE_E=y
 CONFIG_SIFIVE_U=y
 CONFIG_RISCV_VIRT=y
+CONFIG_SERVER_PLATFORM_REF=y
 CONFIG_MICROCHIP_PFSOC=y
 CONFIG_SHAKTI_C=y
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..debac5a7f5 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -48,6 +48,19 @@  config RISCV_VIRT
     select ACPI
     select ACPI_PCI
 
+config SERVER_PLATFORM_REF
+    bool
+    select RISCV_NUMA
+    select GOLDFISH_RTC
+    select PCI
+    select PCI_EXPRESS_GENERIC_BRIDGE
+    select PFLASH_CFI01
+    select SERIAL
+    select RISCV_ACLINT
+    select RISCV_APLIC
+    select RISCV_IMSIC
+    select SIFIVE_TEST
+
 config SHAKTI_C
     bool
     select RISCV_ACLINT
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 2f7ee81be3..bb3aff91ea 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -4,6 +4,7 @@  riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
 riscv_ss.add(files('riscv_hart.c'))
 riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
 riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
+riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: files('server_platform_ref.c'))
 riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
new file mode 100644
index 0000000000..ae90c4b27a
--- /dev/null
+++ b/hw/riscv/server_platform_ref.c
@@ -0,0 +1,1244 @@ 
+/*
+ * QEMU RISC-V Server Platfrom (RVSP) Reference Board
+ *
+ * Copyright (c) 2024 Intel, Inc.
+ *
+ * This board is compliant RISC-V Server platform specification and leveraging
+ * a lot of riscv virt code.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-common.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/char/serial.h"
+#include "hw/block/flash.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci-pci.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/gpex.h"
+#include "hw/core/sysbus-fdt.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/boot.h"
+#include "hw/riscv/numa.h"
+#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
+#include "hw/misc/sifive_test.h"
+#include "chardev/char.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/tcg.h"
+#include "target/riscv/cpu.h"
+#include "target/riscv/pmu.h"
+#include "net/net.h"
+
+#define RVSP_CPUS_MAX_BITS             9
+#define RVSP_CPUS_MAX                  (1 << RVSP_CPUS_MAX_BITS)
+#define RVSP_SOCKETS_MAX_BITS          2
+#define RVSP_SOCKETS_MAX               (1 << RVSP_SOCKETS_MAX_BITS)
+
+#define RVSP_IRQCHIP_NUM_MSIS 255
+#define RVSP_IRQCHIP_NUM_SOURCES 96
+#define RVSP_IRQCHIP_NUM_PRIO_BITS 3
+#define RVSP_IRQCHIP_MAX_GUESTS_BITS 3
+#define RVSP_IRQCHIP_MAX_GUESTS ((1U << RVSP_IRQCHIP_MAX_GUESTS_BITS) - 1U)
+
+#define FDT_PCI_ADDR_CELLS    3
+#define FDT_PCI_INT_CELLS     1
+#define FDT_APLIC_INT_CELLS   2
+#define FDT_IMSIC_INT_CELLS   0
+#define FDT_MAX_INT_CELLS     2
+#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+                                 1 + FDT_MAX_INT_CELLS)
+#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+                                 1 + FDT_APLIC_INT_CELLS)
+
+#define NUM_SATA_PORTS  6
+
+#define TYPE_RVSP_REF_MACHINE MACHINE_TYPE_NAME("rvsp-ref")
+OBJECT_DECLARE_SIMPLE_TYPE(RVSPMachineState, RVSP_REF_MACHINE)
+
+struct RVSPMachineState {
+    /*< private >*/
+    MachineState parent;
+
+    /*< public >*/
+    Notifier machine_done;
+    RISCVHartArrayState soc[RVSP_SOCKETS_MAX];
+    DeviceState *irqchip[RVSP_SOCKETS_MAX];
+    PFlashCFI01 *flash[2];
+
+    int fdt_size;
+    int aia_guests;
+    const MemMapEntry *memmap;
+};
+
+enum {
+    RVSP_DEBUG,
+    RVSP_MROM,
+    RVSP_TEST,
+    RVSP_RTC,
+    RVSP_ACLINT,
+    RVSP_APLIC_M,
+    RVSP_APLIC_S,
+    RVSP_UART0,
+    RVSP_IMSIC_M,
+    RVSP_IMSIC_S,
+    RVSP_FLASH,
+    RVSP_DRAM,
+    RVSP_PCIE_MMIO,
+    RVSP_PCIE_PIO,
+    RVSP_PCIE_ECAM,
+    RVSP_PCIE_MMIO_HIGH
+};
+
+enum {
+    RVSP_UART0_IRQ = 10,
+    RVSP_RTC_IRQ = 11,
+    RVSP_PCIE_IRQ = 0x20, /* 32 to 35 */
+};
+
+/*
+ * The server soc reference machine physical address space used by some of the
+ * devices namely ACLINT, APLIC and IMSIC depend on number of Sockets, number
+ * of CPUs, and number of IMSIC guest files.
+ *
+ * Various limits defined by RVSP_SOCKETS_MAX_BITS, RVSP_CPUS_MAX_BITS, and
+ * RVSP_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization of server soc
+ * reference machine physical address space.
+ */
+
+#define RVSP_IMSIC_GROUP_MAX_SIZE      (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
+#if RVSP_IMSIC_GROUP_MAX_SIZE < \
+    IMSIC_GROUP_SIZE(RVSP_CPUS_MAX_BITS, RVSP_IRQCHIP_MAX_GUESTS_BITS)
+#error "Can't accomodate single IMSIC group in address space"
+#endif
+
+#define RVSP_IMSIC_MAX_SIZE            (RVSP_SOCKETS_MAX * \
+                                        RVSP_IMSIC_GROUP_MAX_SIZE)
+#if 0x4000000 < RVSP_IMSIC_MAX_SIZE
+#error "Can't accomodate all IMSIC groups in address space"
+#endif
+
+static const MemMapEntry rvsp_ref_memmap[] = {
+    [RVSP_DEBUG] =          {        0x0,         0x100 },
+    [RVSP_MROM] =           {     0x1000,        0xf000 },
+    [RVSP_TEST] =           {   0x100000,        0x1000 },
+    [RVSP_RTC] =            {   0x101000,        0x1000 },
+    [RVSP_ACLINT] =         {  0x2000000,       0x10000 },
+    [RVSP_PCIE_PIO] =       {  0x3000000,       0x10000 },
+    [RVSP_APLIC_M] =        {  0xc000000, APLIC_SIZE(RVSP_CPUS_MAX) },
+    [RVSP_APLIC_S] =        {  0xd000000, APLIC_SIZE(RVSP_CPUS_MAX) },
+    [RVSP_UART0] =          { 0x10000000,         0x100 },
+    [RVSP_FLASH] =          { 0x20000000,     0x4000000 },
+    [RVSP_IMSIC_M] =        { 0x24000000, RVSP_IMSIC_MAX_SIZE },
+    [RVSP_IMSIC_S] =        { 0x28000000, RVSP_IMSIC_MAX_SIZE },
+    [RVSP_PCIE_ECAM] =      { 0x30000000,    0x10000000 },
+    [RVSP_PCIE_MMIO] =      { 0x40000000,    0x40000000 },
+    [RVSP_DRAM] =           { 0x80000000, 0xff80000000ull },
+    [RVSP_PCIE_MMIO_HIGH] = { 0x10000000000ull, 0x10000000000ull },
+};
+
+#define RVSP_FLASH_SECTOR_SIZE (256 * KiB)
+
+static PFlashCFI01 *rvsp_flash_create1(RVSPMachineState *s,
+                                       const char *name,
+                                       const char *alias_prop_name)
+{
+    /*
+     * Create a single flash device.  We use the same parameters as
+     * the flash devices on the ARM virt board.
+     */
+    DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
+
+    qdev_prop_set_uint64(dev, "sector-length", RVSP_FLASH_SECTOR_SIZE);
+    qdev_prop_set_uint8(dev, "width", 4);
+    qdev_prop_set_uint8(dev, "device-width", 2);
+    qdev_prop_set_bit(dev, "big-endian", false);
+    qdev_prop_set_uint16(dev, "id0", 0x89);
+    qdev_prop_set_uint16(dev, "id1", 0x18);
+    qdev_prop_set_uint16(dev, "id2", 0x00);
+    qdev_prop_set_uint16(dev, "id3", 0x00);
+    qdev_prop_set_string(dev, "name", name);
+
+    object_property_add_child(OBJECT(s), name, OBJECT(dev));
+    object_property_add_alias(OBJECT(s), alias_prop_name,
+                              OBJECT(dev), "drive");
+
+    return PFLASH_CFI01(dev);
+}
+
+static void rvsp_flash_create(RVSPMachineState *s)
+{
+    s->flash[0] = rvsp_flash_create1(s, "rvsp.flash0", "pflash0");
+    s->flash[1] = rvsp_flash_create1(s, "rvsp.flash1", "pflash1");
+}
+
+static void rvsp_flash_map1(PFlashCFI01 *flash,
+                            hwaddr base, hwaddr size,
+                            MemoryRegion *sysmem)
+{
+    DeviceState *dev = DEVICE(flash);
+
+    assert(QEMU_IS_ALIGNED(size, RVSP_FLASH_SECTOR_SIZE));
+    assert(size / RVSP_FLASH_SECTOR_SIZE <= UINT32_MAX);
+    qdev_prop_set_uint32(dev, "num-blocks", size / RVSP_FLASH_SECTOR_SIZE);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    memory_region_add_subregion(sysmem, base,
+                                sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
+                                                       0));
+}
+
+static void rvsp_flash_map(RVSPMachineState *s,
+                           MemoryRegion *sysmem)
+{
+    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
+    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
+
+    rvsp_flash_map1(s->flash[0], flashbase, flashsize, sysmem);
+    rvsp_flash_map1(s->flash[1], flashbase + flashsize, flashsize, sysmem);
+}
+
+static void create_pcie_irq_map(RVSPMachineState *s, void *fdt, char *nodename,
+                                uint32_t irqchip_phandle)
+{
+    int pin, dev;
+    uint32_t irq_map_stride = 0;
+    uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+                          FDT_MAX_INT_MAP_WIDTH] = {};
+    uint32_t *irq_map = full_irq_map;
+
+    /*
+     * This code creates a standard swizzle of interrupts such that
+     * each device's first interrupt is based on it's PCI_SLOT number.
+     * (See pci_swizzle_map_irq_fn())
+     *
+     * We only need one entry per interrupt in the table (not one per
+     * possible slot) seeing the interrupt-map-mask will allow the table
+     * to wrap to any number of devices.
+     */
+    for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
+        int devfn = dev * 0x8;
+
+        for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
+            int irq_nr = RVSP_PCIE_IRQ +
+                         ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
+            int i = 0;
+
+            /* Fill PCI address cells */
+            irq_map[i] = cpu_to_be32(devfn << 8);
+            i += FDT_PCI_ADDR_CELLS;
+
+            /* Fill PCI Interrupt cells */
+            irq_map[i] = cpu_to_be32(pin + 1);
+            i += FDT_PCI_INT_CELLS;
+
+            /* Fill interrupt controller phandle and cells */
+            irq_map[i++] = cpu_to_be32(irqchip_phandle);
+            irq_map[i++] = cpu_to_be32(irq_nr);
+            irq_map[i++] = cpu_to_be32(0x4);
+
+            if (!irq_map_stride) {
+                irq_map_stride = i;
+            }
+            irq_map += irq_map_stride;
+        }
+    }
+
+    qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
+                     GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+                     irq_map_stride * sizeof(uint32_t));
+
+    qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
+                           0x1800, 0, 0, 0x7);
+}
+
+static void create_fdt_socket_cpus(RVSPMachineState *s, int socket,
+                                   char *clust_name, uint32_t *phandle,
+                                   uint32_t *intc_phandles)
+{
+    int cpu;
+    uint32_t cpu_phandle;
+    MachineState *ms = MACHINE(s);
+    char *name, *cpu_name, *core_name, *intc_name, *sv_name;
+    bool is_32_bit = riscv_is_32bit(&s->soc[0]);
+    uint8_t satp_mode_max;
+
+    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
+        RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
+
+        cpu_phandle = (*phandle)++;
+
+        cpu_name = g_strdup_printf("/cpus/cpu@%d",
+            s->soc[socket].hartid_base + cpu);
+        qemu_fdt_add_subnode(ms->fdt, cpu_name);
+
+        if (cpu_ptr->cfg.satp_mode.supported != 0) {
+            satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
+            sv_name = g_strdup_printf("riscv,%s",
+                                      satp_mode_str(satp_mode_max, is_32_bit));
+            qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
+            g_free(sv_name);
+        }
+
+        name = riscv_isa_string(cpu_ptr);
+        qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
+        g_free(name);
+
+        if (cpu_ptr->cfg.ext_zicbom) {
+            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
+                                  cpu_ptr->cfg.cbom_blocksize);
+        }
+
+        if (cpu_ptr->cfg.ext_zicboz) {
+            qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
+                                  cpu_ptr->cfg.cboz_blocksize);
+        }
+
+        qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv");
+        qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay");
+        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg",
+            s->soc[socket].hartid_base + cpu);
+        qemu_fdt_setprop_string(ms->fdt, cpu_name, "device_type", "cpu");
+        riscv_socket_fdt_write_id(ms, cpu_name, socket);
+        qemu_fdt_setprop_cell(ms->fdt, cpu_name, "phandle", cpu_phandle);
+
+        intc_phandles[cpu] = (*phandle)++;
+
+        intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
+        qemu_fdt_add_subnode(ms->fdt, intc_name);
+        qemu_fdt_setprop_cell(ms->fdt, intc_name, "phandle",
+            intc_phandles[cpu]);
+        qemu_fdt_setprop_string(ms->fdt, intc_name, "compatible",
+            "riscv,cpu-intc");
+        qemu_fdt_setprop(ms->fdt, intc_name, "interrupt-controller", NULL, 0);
+        qemu_fdt_setprop_cell(ms->fdt, intc_name, "#interrupt-cells", 1);
+
+        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
+        qemu_fdt_add_subnode(ms->fdt, core_name);
+        qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
+
+        g_free(core_name);
+        g_free(intc_name);
+        g_free(cpu_name);
+    }
+}
+
+static void create_fdt_socket_memory(RVSPMachineState *s,
+                                     const MemMapEntry *memmap, int socket)
+{
+    char *mem_name;
+    uint64_t addr, size;
+    MachineState *ms = MACHINE(s);
+
+    addr = memmap[RVSP_DRAM].base + riscv_socket_mem_offset(ms, socket);
+    size = riscv_socket_mem_size(ms, socket);
+    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
+    qemu_fdt_add_subnode(ms->fdt, mem_name);
+    qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
+        addr >> 32, addr, size >> 32, size);
+    qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
+    riscv_socket_fdt_write_id(ms, mem_name, socket);
+    g_free(mem_name);
+}
+
+static void create_fdt_socket_aclint(RVSPMachineState *s,
+                                     const MemMapEntry *memmap, int socket,
+                                     uint32_t *intc_phandles)
+{
+    int cpu;
+    char *name;
+    unsigned long addr, size;
+    uint32_t aclint_cells_size;
+    uint32_t *aclint_mtimer_cells;
+    MachineState *ms = MACHINE(s);
+
+    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+
+    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+        aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
+    }
+    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
+
+    addr = memmap[RVSP_ACLINT].base +
+           (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
+    size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
+
+    name = g_strdup_printf("/soc/mtimer@%lx", addr);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
+        "riscv,aclint-mtimer");
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
+        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
+        0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
+        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
+        0x0, RISCV_ACLINT_DEFAULT_MTIME);
+    qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
+        aclint_mtimer_cells, aclint_cells_size);
+    riscv_socket_fdt_write_id(ms, name, socket);
+    g_free(name);
+
+    g_free(aclint_mtimer_cells);
+}
+
+static uint32_t imsic_num_bits(uint32_t count)
+{
+    uint32_t ret = 0;
+
+    while (BIT(ret) < count) {
+        ret++;
+    }
+
+    return ret;
+}
+
+static void create_fdt_one_imsic(RVSPMachineState *s, hwaddr base_addr,
+                                 uint32_t *intc_phandles, uint32_t msi_phandle,
+                                 bool m_mode, uint32_t imsic_guest_bits)
+{
+    int cpu, socket;
+    char *imsic_name;
+    MachineState *ms = MACHINE(s);
+    int socket_count = riscv_socket_count(ms);
+    uint32_t imsic_max_hart_per_socket;
+    uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
+
+    imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
+    imsic_regs = g_new0(uint32_t, socket_count * 4);
+
+    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
+        imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
+    }
+
+    imsic_max_hart_per_socket = 0;
+    for (socket = 0; socket < socket_count; socket++) {
+        imsic_addr = base_addr + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
+        imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
+                     s->soc[socket].num_harts;
+        imsic_regs[socket * 4 + 0] = 0;
+        imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
+        imsic_regs[socket * 4 + 2] = 0;
+        imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
+        if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
+            imsic_max_hart_per_socket = s->soc[socket].num_harts;
+        }
+    }
+
+    imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
+    qemu_fdt_add_subnode(ms->fdt, imsic_name);
+    qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
+    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
+                          FDT_IMSIC_INT_CELLS);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
+                     imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
+    qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
+                     socket_count * sizeof(uint32_t) * 4);
+    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
+                     RVSP_IRQCHIP_NUM_MSIS);
+
+    if (imsic_guest_bits) {
+        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
+                              imsic_guest_bits);
+    }
+
+    if (socket_count > 1) {
+        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
+                              imsic_num_bits(imsic_max_hart_per_socket));
+        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
+                              imsic_num_bits(socket_count));
+        qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
+                              IMSIC_MMIO_GROUP_MIN_SHIFT);
+    }
+    qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
+
+    g_free(imsic_name);
+    g_free(imsic_regs);
+    g_free(imsic_cells);
+}
+
+static void create_fdt_imsic(RVSPMachineState *s, const MemMapEntry *memmap,
+                             uint32_t *phandle, uint32_t *intc_phandles,
+                             uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
+{
+    *msi_m_phandle = (*phandle)++;
+    *msi_s_phandle = (*phandle)++;
+
+    /* M-level IMSIC node */
+    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_M].base, intc_phandles,
+                         *msi_m_phandle, true, 0);
+
+    /* S-level IMSIC node */
+    create_fdt_one_imsic(s, memmap[RVSP_IMSIC_S].base, intc_phandles,
+                         *msi_s_phandle, false,
+                         imsic_num_bits(s->aia_guests + 1));
+
+}
+
+static void create_fdt_one_aplic(RVSPMachineState *s, int socket,
+                                 unsigned long aplic_addr, uint32_t aplic_size,
+                                 uint32_t msi_phandle,
+                                 uint32_t *intc_phandles,
+                                 uint32_t aplic_phandle,
+                                 uint32_t aplic_child_phandle,
+                                 bool m_mode, int num_harts)
+{
+    int cpu;
+    char *aplic_name;
+    uint32_t *aplic_cells;
+    MachineState *ms = MACHINE(s);
+
+    aplic_cells = g_new0(uint32_t, num_harts * 2);
+
+    for (cpu = 0; cpu < num_harts; cpu++) {
+        aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
+    }
+
+    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
+    qemu_fdt_add_subnode(ms->fdt, aplic_name);
+    qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name,
+                          "#interrupt-cells", FDT_APLIC_INT_CELLS);
+    qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
+
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
+
+    qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
+                           0x0, aplic_addr, 0x0, aplic_size);
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
+                          RVSP_IRQCHIP_NUM_SOURCES);
+
+    if (aplic_child_phandle) {
+        qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
+                              aplic_child_phandle);
+        qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
+                               aplic_child_phandle, 0x1,
+                               RVSP_IRQCHIP_NUM_SOURCES);
+    }
+
+    riscv_socket_fdt_write_id(ms, aplic_name, socket);
+    qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
+
+    g_free(aplic_name);
+    g_free(aplic_cells);
+}
+
+static void create_fdt_socket_aplic(RVSPMachineState *s,
+                                    const MemMapEntry *memmap, int socket,
+                                    uint32_t msi_m_phandle,
+                                    uint32_t msi_s_phandle,
+                                    uint32_t *phandle,
+                                    uint32_t *intc_phandles,
+                                    uint32_t *aplic_phandles,
+                                    int num_harts)
+{
+    char *aplic_name;
+    unsigned long aplic_addr;
+    uint32_t aplic_m_phandle, aplic_s_phandle;
+
+    aplic_m_phandle = (*phandle)++;
+    aplic_s_phandle = (*phandle)++;
+
+    /* M-level APLIC node */
+    aplic_addr = memmap[RVSP_APLIC_M].base +
+                 (memmap[RVSP_APLIC_M].size * socket);
+    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_M].size,
+                         msi_m_phandle, intc_phandles,
+                         aplic_m_phandle, aplic_s_phandle,
+                         true, num_harts);
+
+    /* S-level APLIC node */
+    aplic_addr = memmap[RVSP_APLIC_S].base +
+                 (memmap[RVSP_APLIC_S].size * socket);
+    create_fdt_one_aplic(s, socket, aplic_addr, memmap[RVSP_APLIC_S].size,
+                         msi_s_phandle, intc_phandles,
+                         aplic_s_phandle, 0,
+                         false, num_harts);
+
+    aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
+
+    g_free(aplic_name);
+
+    aplic_phandles[socket] = aplic_s_phandle;
+}
+
+static void create_fdt_pmu(RVSPMachineState *s)
+{
+    char *pmu_name;
+    MachineState *ms = MACHINE(s);
+    RISCVCPU hart = s->soc[0].harts[0];
+
+    pmu_name = g_strdup_printf("/pmu");
+    qemu_fdt_add_subnode(ms->fdt, pmu_name);
+    qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
+    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
+
+    g_free(pmu_name);
+}
+
+static void create_fdt_sockets(RVSPMachineState *s, const MemMapEntry *memmap,
+                               uint32_t *phandle,
+                               uint32_t *irq_mmio_phandle,
+                               uint32_t *irq_pcie_phandle,
+                               uint32_t *msi_pcie_phandle)
+{
+    char *clust_name;
+    int socket, phandle_pos;
+    MachineState *ms = MACHINE(s);
+    uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
+    uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
+    int socket_count = riscv_socket_count(ms);
+
+    qemu_fdt_add_subnode(ms->fdt, "/cpus");
+    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
+                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
+    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
+    qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
+    qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
+
+    intc_phandles = g_new0(uint32_t, ms->smp.cpus);
+
+    phandle_pos = ms->smp.cpus;
+    for (socket = (socket_count - 1); socket >= 0; socket--) {
+        phandle_pos -= s->soc[socket].num_harts;
+
+        clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
+        qemu_fdt_add_subnode(ms->fdt, clust_name);
+
+        create_fdt_socket_cpus(s, socket, clust_name, phandle,
+                               &intc_phandles[phandle_pos]);
+
+        create_fdt_socket_memory(s, memmap, socket);
+
+        g_free(clust_name);
+
+        create_fdt_socket_aclint(s, memmap, socket,
+            &intc_phandles[phandle_pos]);
+    }
+
+    create_fdt_imsic(s, memmap, phandle, intc_phandles,
+        &msi_m_phandle, &msi_s_phandle);
+    *msi_pcie_phandle = msi_s_phandle;
+
+    phandle_pos = ms->smp.cpus;
+    for (socket = (socket_count - 1); socket >= 0; socket--) {
+        phandle_pos -= s->soc[socket].num_harts;
+
+        create_fdt_socket_aplic(s, memmap, socket,
+                                msi_m_phandle, msi_s_phandle, phandle,
+                                &intc_phandles[phandle_pos],
+                                xplic_phandles,
+                                s->soc[socket].num_harts);
+    }
+
+    g_free(intc_phandles);
+
+    for (socket = 0; socket < socket_count; socket++) {
+        if (socket == 0) {
+            *irq_mmio_phandle = xplic_phandles[socket];
+            *irq_pcie_phandle = xplic_phandles[socket];
+        }
+        if (socket == 1) {
+            *irq_pcie_phandle = xplic_phandles[socket];
+        }
+    }
+
+    riscv_socket_fdt_write_distance_matrix(ms);
+}
+
+static void create_fdt_pcie(RVSPMachineState *s, const MemMapEntry *memmap,
+                            uint32_t irq_pcie_phandle,
+                            uint32_t msi_pcie_phandle)
+{
+    char *name;
+    MachineState *ms = MACHINE(s);
+
+    name = g_strdup_printf("/soc/pci@%lx",
+        (long) memmap[RVSP_PCIE_ECAM].base);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
+        FDT_PCI_ADDR_CELLS);
+    qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
+        FDT_PCI_INT_CELLS);
+    qemu_fdt_setprop_cell(ms->fdt, name, "#size-cells", 0x2);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
+        "pci-host-ecam-generic");
+    qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
+    qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
+    qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
+        memmap[RVSP_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
+    qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
+    qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
+        memmap[RVSP_PCIE_ECAM].base, 0, memmap[RVSP_PCIE_ECAM].size);
+    qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
+        1, FDT_PCI_RANGE_IOPORT, 2, 0,
+        2, memmap[RVSP_PCIE_PIO].base, 2, memmap[RVSP_PCIE_PIO].size,
+        1, FDT_PCI_RANGE_MMIO,
+        2, memmap[RVSP_PCIE_MMIO].base,
+        2, memmap[RVSP_PCIE_MMIO].base, 2, memmap[RVSP_PCIE_MMIO].size,
+        1, FDT_PCI_RANGE_MMIO_64BIT,
+        2, memmap[RVSP_PCIE_MMIO_HIGH].base,
+        2, memmap[RVSP_PCIE_MMIO_HIGH].base, 2,
+           memmap[RVSP_PCIE_MMIO_HIGH].size);
+
+    create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
+    g_free(name);
+}
+
+static void create_fdt_reset(RVSPMachineState *s, const MemMapEntry *memmap,
+                             uint32_t *phandle)
+{
+    char *name;
+    uint32_t test_phandle;
+    MachineState *ms = MACHINE(s);
+
+    test_phandle = (*phandle)++;
+    name = g_strdup_printf("/soc/test@%lx",
+        (long)memmap[RVSP_TEST].base);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    {
+        static const char * const compat[3] = {
+            "sifive,test1", "sifive,test0", "syscon"
+        };
+        qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
+                                      (char **)&compat, ARRAY_SIZE(compat));
+    }
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
+        0x0, memmap[RVSP_TEST].base, 0x0, memmap[RVSP_TEST].size);
+    qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
+    test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
+    g_free(name);
+
+    name = g_strdup_printf("/reboot");
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-reboot");
+    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
+    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
+    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_RESET);
+    g_free(name);
+
+    name = g_strdup_printf("/poweroff");
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "syscon-poweroff");
+    qemu_fdt_setprop_cell(ms->fdt, name, "regmap", test_phandle);
+    qemu_fdt_setprop_cell(ms->fdt, name, "offset", 0x0);
+    qemu_fdt_setprop_cell(ms->fdt, name, "value", FINISHER_PASS);
+    g_free(name);
+}
+
+static void create_fdt_uart(RVSPMachineState *s, const MemMapEntry *memmap,
+                            uint32_t irq_mmio_phandle)
+{
+    char *name;
+    MachineState *ms = MACHINE(s);
+
+    name = g_strdup_printf("/soc/serial@%lx", (long)memmap[RVSP_UART0].base);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
+        0x0, memmap[RVSP_UART0].base,
+        0x0, memmap[RVSP_UART0].size);
+    qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
+    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
+    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_UART0_IRQ, 0x4);
+
+    qemu_fdt_add_subnode(ms->fdt, "/chosen");
+    qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
+    g_free(name);
+}
+
+static void create_fdt_rtc(RVSPMachineState *s, const MemMapEntry *memmap,
+                           uint32_t irq_mmio_phandle)
+{
+    char *name;
+    MachineState *ms = MACHINE(s);
+
+    name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[RVSP_RTC].base);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible",
+        "google,goldfish-rtc");
+    qemu_fdt_setprop_cells(ms->fdt, name, "reg",
+        0x0, memmap[RVSP_RTC].base, 0x0, memmap[RVSP_RTC].size);
+    qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
+        irq_mmio_phandle);
+    qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", RVSP_RTC_IRQ, 0x4);
+    g_free(name);
+}
+
+static void create_fdt_flash(RVSPMachineState *s, const MemMapEntry *memmap)
+{
+    char *name;
+    MachineState *ms = MACHINE(s);
+    hwaddr flashsize = rvsp_ref_memmap[RVSP_FLASH].size / 2;
+    hwaddr flashbase = rvsp_ref_memmap[RVSP_FLASH].base;
+
+    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
+    qemu_fdt_add_subnode(ms->fdt, name);
+    qemu_fdt_setprop_string(ms->fdt, name, "compatible", "cfi-flash");
+    qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+                                 2, flashbase, 2, flashsize,
+                                 2, flashbase + flashsize, 2, flashsize);
+    qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
+    g_free(name);
+}
+
+static void create_fdt(RVSPMachineState *s, const MemMapEntry *memmap)
+{
+    MachineState *ms = MACHINE(s);
+    uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
+    uint32_t irq_pcie_phandle = 1;
+    uint8_t rng_seed[32];
+
+    ms->fdt = create_device_tree(&s->fdt_size);
+    if (!ms->fdt) {
+        error_report("create_device_tree() failed");
+        exit(1);
+    }
+
+    qemu_fdt_setprop_string(ms->fdt, "/", "model", "riscv-rvsp-ref,qemu");
+    qemu_fdt_setprop_string(ms->fdt, "/", "compatible", "riscv-rvsp-ref");
+    qemu_fdt_setprop_cell(ms->fdt, "/", "#size-cells", 0x2);
+    qemu_fdt_setprop_cell(ms->fdt, "/", "#address-cells", 0x2);
+
+    qemu_fdt_add_subnode(ms->fdt, "/soc");
+    qemu_fdt_setprop(ms->fdt, "/soc", "ranges", NULL, 0);
+    qemu_fdt_setprop_string(ms->fdt, "/soc", "compatible", "simple-bus");
+    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#size-cells", 0x2);
+    qemu_fdt_setprop_cell(ms->fdt, "/soc", "#address-cells", 0x2);
+
+    create_fdt_sockets(s, memmap, &phandle, &irq_mmio_phandle,
+                       &irq_pcie_phandle, &msi_pcie_phandle);
+
+    create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
+
+    create_fdt_reset(s, memmap, &phandle);
+
+    create_fdt_uart(s, memmap, irq_mmio_phandle);
+
+    create_fdt_rtc(s, memmap, irq_mmio_phandle);
+
+    create_fdt_flash(s, memmap);
+    create_fdt_pmu(s);
+
+    /* Pass seed to RNG */
+    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
+    qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
+                     rng_seed, sizeof(rng_seed));
+}
+
+static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
+                                          DeviceState *irqchip,
+                                          RVSPMachineState *s)
+{
+    DeviceState *dev;
+    PCIHostState *pci;
+    PCIDevice *pdev_ahci;
+    AHCIPCIState *ich9;
+    DriveInfo *hd[NUM_SATA_PORTS];
+    MemoryRegion *ecam_alias, *ecam_reg;
+    MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
+    hwaddr ecam_base = rvsp_ref_memmap[RVSP_PCIE_ECAM].base;
+    hwaddr ecam_size = rvsp_ref_memmap[RVSP_PCIE_ECAM].size;
+    hwaddr mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO].base;
+    hwaddr mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO].size;
+    hwaddr high_mmio_base = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].base;
+    hwaddr high_mmio_size = rvsp_ref_memmap[RVSP_PCIE_MMIO_HIGH].size;
+    hwaddr pio_base = rvsp_ref_memmap[RVSP_PCIE_PIO].base;
+    hwaddr pio_size = rvsp_ref_memmap[RVSP_PCIE_PIO].size;
+    MachineClass *mc = MACHINE_GET_CLASS(s);
+    qemu_irq irq;
+    int i;
+
+    dev = qdev_new(TYPE_GPEX_HOST);
+
+    /* Set GPEX object properties for the rvsp ref machine */
+    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
+                            ecam_base, NULL);
+    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
+                            ecam_size, NULL);
+    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
+                             PCI_HOST_BELOW_4G_MMIO_BASE,
+                             mmio_base, NULL);
+    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
+                            mmio_size, NULL);
+    object_property_set_uint(OBJECT(GPEX_HOST(dev)),
+                             PCI_HOST_ABOVE_4G_MMIO_BASE,
+                             high_mmio_base, NULL);
+    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
+                            high_mmio_size, NULL);
+    object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
+                            pio_base, NULL);
+    object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
+                            pio_size, NULL);
+
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    ecam_alias = g_new0(MemoryRegion, 1);
+    ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+    memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
+                             ecam_reg, 0, ecam_size);
+    memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
+
+    mmio_alias = g_new0(MemoryRegion, 1);
+    mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
+                             mmio_reg, mmio_base, mmio_size);
+    memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
+
+    /* Map high MMIO space */
+    high_mmio_alias = g_new0(MemoryRegion, 1);
+    memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
+                             mmio_reg, high_mmio_base, high_mmio_size);
+    memory_region_add_subregion(get_system_memory(), high_mmio_base,
+                                high_mmio_alias);
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
+
+    for (i = 0; i < GPEX_NUM_IRQS; i++) {
+        irq = qdev_get_gpio_in(irqchip, RVSP_PCIE_IRQ + i);
+
+        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
+        gpex_set_irq_num(GPEX_HOST(dev), i, RVSP_PCIE_IRQ + i);
+    }
+
+    pci = PCI_HOST_BRIDGE(dev);
+    pci_init_nic_devices(pci->bus, mc->default_nic);
+    /* IDE disk setup.  */
+    pdev_ahci = pci_create_simple(pci->bus, -1, TYPE_ICH9_AHCI);
+    ich9 = ICH9_AHCI(pdev_ahci);
+    g_assert(ARRAY_SIZE(hd) == ich9->ahci.ports);
+    ide_drive_get(hd, ich9->ahci.ports);
+    ahci_ide_create_devs(&ich9->ahci, hd);
+
+    GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
+    return dev;
+}
+
+static DeviceState *rvsp_ref_create_aia(int aia_guests,
+                                        const MemMapEntry *memmap, int socket,
+                                        int base_hartid, int hart_count)
+{
+    int i;
+    hwaddr addr;
+    uint32_t guest_bits;
+    DeviceState *aplic_s = NULL;
+    DeviceState *aplic_m = NULL;
+    bool msimode = true;
+
+    /* Per-socket M-level IMSICs */
+    addr = memmap[RVSP_IMSIC_M].base +
+           socket * RVSP_IMSIC_GROUP_MAX_SIZE;
+    for (i = 0; i < hart_count; i++) {
+        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
+                           base_hartid + i, true, 1,
+                           RVSP_IRQCHIP_NUM_MSIS);
+    }
+
+    /* Per-socket S-level IMSICs */
+    guest_bits = imsic_num_bits(aia_guests + 1);
+    addr = memmap[RVSP_IMSIC_S].base + socket * RVSP_IMSIC_GROUP_MAX_SIZE;
+    for (i = 0; i < hart_count; i++) {
+        riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
+                           base_hartid + i, false, 1 + aia_guests,
+                           RVSP_IRQCHIP_NUM_MSIS);
+    }
+
+    /* Per-socket M-level APLIC */
+    aplic_m = riscv_aplic_create(memmap[RVSP_APLIC_M].base +
+                                 socket * memmap[RVSP_APLIC_M].size,
+                                 memmap[RVSP_APLIC_M].size,
+                                 (msimode) ? 0 : base_hartid,
+                                 (msimode) ? 0 : hart_count,
+                                 RVSP_IRQCHIP_NUM_SOURCES,
+                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
+                                 msimode, true, NULL);
+
+    /* Per-socket S-level APLIC */
+    aplic_s = riscv_aplic_create(memmap[RVSP_APLIC_S].base +
+                                 socket * memmap[RVSP_APLIC_S].size,
+                                 memmap[RVSP_APLIC_S].size,
+                                 (msimode) ? 0 : base_hartid,
+                                 (msimode) ? 0 : hart_count,
+                                 RVSP_IRQCHIP_NUM_SOURCES,
+                                 RVSP_IRQCHIP_NUM_PRIO_BITS,
+                                 msimode, false, aplic_m);
+
+    (void)aplic_s;
+    return aplic_m;
+}
+
+static void rvsp_ref_machine_done(Notifier *notifier, void *data)
+{
+    RVSPMachineState *s = container_of(notifier, RVSPMachineState,
+                                       machine_done);
+    const MemMapEntry *memmap = rvsp_ref_memmap;
+    MachineState *machine = MACHINE(s);
+    target_ulong start_addr = memmap[RVSP_DRAM].base;
+    target_ulong firmware_end_addr, kernel_start_addr;
+    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
+    uint64_t fdt_load_addr;
+    uint64_t kernel_entry = 0;
+    BlockBackend *pflash_blk0;
+
+    /* load/create device tree */
+    if (machine->dtb) {
+        machine->fdt = load_device_tree(machine->dtb, &s->fdt_size);
+        if (!machine->fdt) {
+            error_report("load_device_tree() failed");
+            exit(1);
+        }
+    } else {
+        create_fdt(s, memmap);
+    }
+
+    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
+                                                     start_addr, NULL);
+
+    pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
+    if (pflash_blk0) {
+        if (machine->firmware && !strcmp(machine->firmware, "none")) {
+            /*
+             * Pflash was supplied but bios is none and not KVM guest,
+             * let's overwrite the address we jump to after reset to
+             * the base of the flash.
+             */
+            start_addr = rvsp_ref_memmap[RVSP_FLASH].base;
+        } else {
+            /*
+             * Pflash was supplied but either KVM guest or bios is not none.
+             * In this case, base of the flash would contain S-mode payload.
+             */
+            riscv_setup_firmware_boot(machine);
+            kernel_entry = rvsp_ref_memmap[RVSP_FLASH].base;
+        }
+    }
+
+    if (machine->kernel_filename && !kernel_entry) {
+        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
+                                                         firmware_end_addr);
+
+        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
+                                         kernel_start_addr, true, NULL);
+    }
+
+    fdt_load_addr = riscv_compute_fdt_addr(memmap[RVSP_DRAM].base,
+                                           memmap[RVSP_DRAM].size,
+                                           machine);
+    riscv_load_fdt(fdt_load_addr, machine->fdt);
+
+    /* load the reset vector */
+    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
+                              rvsp_ref_memmap[RVSP_MROM].base,
+                              rvsp_ref_memmap[RVSP_MROM].size, kernel_entry,
+                              fdt_load_addr);
+
+}
+
+static void rvsp_ref_machine_init(MachineState *machine)
+{
+    const MemMapEntry *memmap = rvsp_ref_memmap;
+    RVSPMachineState *s = RVSP_REF_MACHINE(machine);
+    MemoryRegion *system_memory = get_system_memory();
+    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
+    char *soc_name;
+    DeviceState *mmio_irqchip, *pcie_irqchip;
+    int i, base_hartid, hart_count;
+    int socket_count = riscv_socket_count(machine);
+
+    /* Check socket count limit */
+    if (RVSP_SOCKETS_MAX < socket_count) {
+        error_report("number of sockets/nodes should be less than %d",
+            RVSP_SOCKETS_MAX);
+        exit(1);
+    }
+
+    if (!tcg_enabled()) {
+        error_report("'aclint' is only available with TCG acceleration");
+        exit(1);
+    }
+
+    /* Initialize sockets */
+    mmio_irqchip = pcie_irqchip = NULL;
+    for (i = 0; i < socket_count; i++) {
+        if (!riscv_socket_check_hartids(machine, i)) {
+            error_report("discontinuous hartids in socket%d", i);
+            exit(1);
+        }
+
+        base_hartid = riscv_socket_first_hartid(machine, i);
+        if (base_hartid < 0) {
+            error_report("can't find hartid base for socket%d", i);
+            exit(1);
+        }
+
+        hart_count = riscv_socket_hart_count(machine, i);
+        if (hart_count < 0) {
+            error_report("can't find hart count for socket%d", i);
+            exit(1);
+        }
+
+        soc_name = g_strdup_printf("soc%d", i);
+        object_initialize_child(OBJECT(machine), soc_name, &s->soc[i],
+                                TYPE_RISCV_HART_ARRAY);
+        g_free(soc_name);
+        object_property_set_str(OBJECT(&s->soc[i]), "cpu-type",
+                                machine->cpu_type, &error_abort);
+        object_property_set_int(OBJECT(&s->soc[i]), "hartid-base",
+                                base_hartid, &error_abort);
+        object_property_set_int(OBJECT(&s->soc[i]), "num-harts",
+                                hart_count, &error_abort);
+        sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_fatal);
+
+        /* Per-socket ACLINT MTIMER */
+        riscv_aclint_mtimer_create(memmap[RVSP_ACLINT].base +
+                i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+            RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+            base_hartid, hart_count,
+            RISCV_ACLINT_DEFAULT_MTIMECMP,
+            RISCV_ACLINT_DEFAULT_MTIME,
+            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
+
+        /* Per-socket interrupt controller */
+        s->irqchip[i] = rvsp_ref_create_aia(s->aia_guests,
+                                            memmap, i, base_hartid,
+                                            hart_count);
+
+        /* Try to use different IRQCHIP instance based device type */
+        if (i == 0) {
+            mmio_irqchip = s->irqchip[i];
+            pcie_irqchip = s->irqchip[i];
+        }
+        if (i == 1) {
+            pcie_irqchip = s->irqchip[i];
+        }
+    }
+
+    s->memmap = rvsp_ref_memmap;
+
+    /* register system main memory (actual RAM) */
+    memory_region_add_subregion(system_memory, memmap[RVSP_DRAM].base,
+        machine->ram);
+
+    /* boot rom */
+    memory_region_init_rom(mask_rom, NULL, "riscv_rvsp_ref_board.mrom",
+                           memmap[RVSP_MROM].size, &error_fatal);
+    memory_region_add_subregion(system_memory, memmap[RVSP_MROM].base,
+                                mask_rom);
+
+    /* SiFive Test MMIO device */
+    sifive_test_create(memmap[RVSP_TEST].base);
+
+    gpex_pcie_init(system_memory, pcie_irqchip, s);
+
+    serial_mm_init(system_memory, memmap[RVSP_UART0].base,
+        0, qdev_get_gpio_in(mmio_irqchip, RVSP_UART0_IRQ), 399193,
+        serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+    sysbus_create_simple("goldfish_rtc", memmap[RVSP_RTC].base,
+        qdev_get_gpio_in(mmio_irqchip, RVSP_RTC_IRQ));
+
+    for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
+        /* Map legacy -drive if=pflash to machine properties */
+        pflash_cfi01_legacy_drive(s->flash[i],
+                                  drive_get(IF_PFLASH, 0, i));
+    }
+    rvsp_flash_map(s, system_memory);
+
+    s->machine_done.notify = rvsp_ref_machine_done;
+    qemu_add_machine_init_done_notifier(&s->machine_done);
+}
+
+static void rvsp_ref_machine_instance_init(Object *obj)
+{
+    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
+
+    rvsp_flash_create(s);
+}
+
+static char *rvsp_ref_get_aia_guests(Object *obj, Error **errp)
+{
+    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
+    char val[32];
+
+    sprintf(val, "%d", s->aia_guests);
+    return g_strdup(val);
+}
+
+static void rvsp_ref_set_aia_guests(Object *obj, const char *val, Error **errp)
+{
+    RVSPMachineState *s = RVSP_REF_MACHINE(obj);
+
+    s->aia_guests = atoi(val);
+    if (s->aia_guests < 0 || s->aia_guests > RVSP_IRQCHIP_MAX_GUESTS) {
+        error_setg(errp, "Invalid number of AIA IMSIC guests");
+        error_append_hint(errp, "Valid values be between 0 and %d.\n",
+                          RVSP_IRQCHIP_MAX_GUESTS);
+    }
+}
+
+static void rvsp_ref_machine_class_init(ObjectClass *oc, void *data)
+{
+    char str[128];
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "RISC-V Server SoC Reference board";
+    mc->init = rvsp_ref_machine_init;
+    mc->max_cpus = RVSP_CPUS_MAX;
+    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
+    mc->pci_allow_0_address = true;
+    mc->default_nic = "e1000e";
+    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
+    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
+    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
+    mc->numa_mem_supported = true;
+    /* platform instead of architectural choice */
+    mc->cpu_cluster_has_numa_boundary = true;
+    mc->default_ram_id = "riscv_rvsp_ref_board.ram";
+
+    object_class_property_add_str(oc, "aia-guests",
+                                  rvsp_ref_get_aia_guests,
+                                  rvsp_ref_set_aia_guests);
+    sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
+                 "should be between 0 and %d.", RVSP_IRQCHIP_MAX_GUESTS);
+    object_class_property_set_description(oc, "aia-guests", str);
+}
+
+static const TypeInfo rvsp_ref_typeinfo = {
+    .name       = TYPE_RVSP_REF_MACHINE,
+    .parent     = TYPE_MACHINE,
+    .class_init = rvsp_ref_machine_class_init,
+    .instance_init = rvsp_ref_machine_instance_init,
+    .instance_size = sizeof(RVSPMachineState),
+};
+
+static void rvsp_ref_init_register_types(void)
+{
+    type_register_static(&rvsp_ref_typeinfo);
+}
+
+type_init(rvsp_ref_init_register_types)