diff mbox

[3/3] ppc/pnv: add a PowerNVCPUCore object

Message ID 1470388537-2908-4-git-send-email-clg@kaod.org
State New
Headers show

Commit Message

Cédric Le Goater Aug. 5, 2016, 9:15 a.m. UTC
This is largy inspired by sPAPRCPUCore with some simplification, no
hotplug for instance. But the differences are small and the objects
could possibly be merged.

A set of PowerNVCPUCore objects is added to the PnvChip and the device
tree is populated looping on these cores. Core ids in the device tree
are still a little fuzy. To be checked.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 hw/ppc/Makefile.objs      |   2 +-
 hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-
 hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/pnv.h      |   7 ++
 include/hw/ppc/pnv_core.h |  47 +++++++++++++
 5 files changed, 383 insertions(+), 4 deletions(-)
 create mode 100644 hw/ppc/pnv_core.c
 create mode 100644 include/hw/ppc/pnv_core.h

Comments

David Gibson Aug. 16, 2016, 2:39 a.m. UTC | #1
On Fri, Aug 05, 2016 at 11:15:37AM +0200, Cédric Le Goater wrote:
> This is largy inspired by sPAPRCPUCore with some simplification, no
> hotplug for instance. But the differences are small and the objects
> could possibly be merged.
> 
> A set of PowerNVCPUCore objects is added to the PnvChip and the device
> tree is populated looping on these cores. Core ids in the device tree
> are still a little fuzy. To be checked.

So, it's not immediately obvious to me if you want an actual core
object.  You could potentially create the actual vcpu objects directly
from the chip object.  That assumes that any hotplug will only be at
chip granularity, not core granularity, but I'm guessing that's the
case anyway.

That said, if having the intermediate core object is helpful, you're
certainly free to have it.

> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  hw/ppc/Makefile.objs      |   2 +-
>  hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-
>  hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/pnv.h      |   7 ++
>  include/hw/ppc/pnv_core.h |  47 +++++++++++++
>  5 files changed, 383 insertions(+), 4 deletions(-)
>  create mode 100644 hw/ppc/pnv_core.c
>  create mode 100644 include/hw/ppc/pnv_core.h
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index 8105db7d5600..f8c7d1db9ade 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>  # IBM PowerNV
> -obj-$(CONFIG_POWERNV) += pnv.o
> +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index a680780e9dea..1219493c7218 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -35,6 +35,7 @@
>  #include "hw/ppc/fdt.h"
>  #include "hw/ppc/ppc.h"
>  #include "hw/ppc/pnv.h"
> +#include "hw/ppc/pnv_core.h"
>  #include "hw/loader.h"
>  #include "exec/address-spaces.h"
>  #include "qemu/cutils.h"
> @@ -112,6 +113,114 @@ static int powernv_populate_memory(void *fdt)
>      return 0;
>  }
>  
> +static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    int smt_threads = ppc_get_compat_smt_threads(cpu);
> +    CPUPPCState *env = &cpu->env;
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    uint32_t servers_prop[smt_threads];
> +    uint32_t gservers_prop[smt_threads * 2];
> +    int i, index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
> +    uint32_t cpufreq = 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    char *nodename;
> +
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> +
> +    _FDT((fdt_begin_node(fdt, nodename)));
> +
> +    g_free(nodename);
> +
> +    _FDT((fdt_property_cell(fdt, "reg", index)));
> +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));

The handling of dt_id is going to collide with cleanups I want to do
in this area (for spapr and the ppc cpu core).  Not sure there's a lot
you can do to avoid that at this stage.

> +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
> +                            env->icache_line_size)));
> +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> +    } else {
> +        error_report("Warning: Unknown L1 dcache size for cpu");
> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
> +    } else {
> +        error_report("Warning: Unknown L1 icache size for cpu");
> +    }
> +
> +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_property_string(fdt, "status", "okay")));
> +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
> +
> +    if (cpu->cpu_version) {
> +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
> +    }
> +
> +    /* Build interrupt servers and gservers properties */
> +    for (i = 0; i < smt_threads; i++) {
> +        servers_prop[i] = cpu_to_be32(index + i);
> +        /* Hack, direct the group queues back to cpu 0 */
> +        gservers_prop[i * 2] = cpu_to_be32(index + i);
> +        gservers_prop[i * 2 + 1] = 0;
> +    }
> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> +                       servers_prop, sizeof(servers_prop))));
> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> +                       gservers_prop, sizeof(gservers_prop))));
> +
> +    _FDT((fdt_end_node(fdt)));
> +}
> +
>  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>                                  const char *kernel_cmdline)
>  {
> @@ -120,6 +229,7 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>      uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
>      char *buf;
>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
> +    int i;
>  
>      fdt = g_malloc0(FDT_MAX_SIZE);
>      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
> @@ -156,6 +266,23 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>      /* Memory */
>      _FDT((powernv_populate_memory(fdt)));
>  
> +    /* cpus */
> +    _FDT((fdt_begin_node(fdt, "cpus")));
> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> +
> +    for (i = 0; i < pnv->num_chips; i++) {
> +        PnvChip *chip = &pnv->chips[i];
> +        int j;
> +
> +        for (j = 0; j < chip->num_cores; j++) {
> +            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);
> +            CPUState *cs = CPU(DEVICE(pc->threads));
> +            powernv_create_core_node(fdt, cs, chip->chip_id);

I think it would be nicer to define the fdt creation function in terms
of the core object, and have it retrieve the representative thread itself.

> +        }
> +    }
> +    _FDT((fdt_end_node(fdt)));
> +
>      _FDT((fdt_end_node(fdt))); /* close root node */
>      _FDT((fdt_finish(fdt)));
>  
> @@ -168,11 +295,13 @@ static void ppc_powernv_reset(void)
>      sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>      void *fdt;
>  
> +    pnv->fdt_addr = FDT_ADDR;
> +
>      qemu_devices_reset();
>  
>      fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
>  
> -    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
> +    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));

These look like not really related changes, that should be maybe
folded into 1/3.

>  }
>  
>  static void ppc_powernv_init(MachineState *machine)
> @@ -252,6 +381,10 @@ static void ppc_powernv_init(MachineState *machine)
>  
>          object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);
>          object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);
> +        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",
> +                                &error_abort);
> +        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",
> +                                &error_abort);

So various drafts of the spapr cores had a cpu-model parameter, but we
eventually rejected it in favour of having different core types
corresponding to the different, well, core types.  Unless there's a
compelling reason otherwise, I think it would be nicer to do the same
for the pnvchip objects - a p8pnvchip object will always create p8
cores and so forth.

>          object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);
>      }
>  }
> @@ -292,14 +425,35 @@ static const TypeInfo powernv_machine_2_8_info = {
>      .class_init    = powernv_machine_2_8_class_init,
>  };
>  
> -
>  static void pnv_chip_realize(DeviceState *dev, Error **errp)
>  {
> -    ;
> +    int i;
> +    PnvChip *chip = PNV_CHIP(dev);
> +    char *typename = powernv_cpu_core_typename(chip->cpu_model);
> +
> +    if (!object_class_by_name(typename)) {
> +        error_setg(errp, "Unable to find PowerNV CPU Core definition");
> +        return;
> +    }
> +
> +    chip->cores = g_new0(Object *, chip->num_cores);
> +    for (i = 0; i < chip->num_cores; i++) {
> +        int core_id = i * smp_threads;
> +        chip->cores[i] = object_new(typename);
> +        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",
> +                                &error_fatal);
> +        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,
> +                                &error_fatal);
> +        object_property_set_bool(chip->cores[i], true, "realized",
> +                                 &error_fatal);
> +    }
> +    g_free(typename);
>  }
>  
>  static Property pnv_chip_properties[] = {
>      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
> +    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),
> +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> new file mode 100644
> index 000000000000..1e36709db993
> --- /dev/null
> +++ b/hw/ppc/pnv_core.c
> @@ -0,0 +1,171 @@
> +/*
> + * QEMU PowerPC PowerNV CPU model
> + *
> + * Copyright (c) IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public License
> + * as published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +#include "qemu/osdep.h"
> +#include "sysemu/sysemu.h"
> +#include "qapi/error.h"
> +#include "target-ppc/cpu.h"
> +#include "hw/ppc/ppc.h"
> +#include "hw/ppc/pnv.h"
> +#include "hw/ppc/pnv_core.h"
> +
> +static void powernv_cpu_reset(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    CPUState *cs = CPU(cpu);
> +    CPUPPCState *env = &cpu->env;
> +    MachineState *machine = MACHINE(qdev_get_machine());
> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
> +
> +    cpu_reset(cs);
> +
> +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);

Is the PIR writable?  If not it might be better to move this to init
rather than reset time.

> +    env->spr[SPR_HIOR] = 0;
> +    env->gpr[3] = pnv->fdt_addr;
> +    env->nip = 0x10;
> +    env->msr |= MSR_HVB;
> +}
> +
> +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
> +{
> +    CPUPPCState *env = &cpu->env;
> +
> +    /* Set time-base frequency to 512 MHz */
> +    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> +
> +    /* MSR[IP] doesn't exist nowadays */
> +    env->msr_mask &= ~(1 << 6);
> +
> +    qemu_register_reset(powernv_cpu_reset, cpu);
> +    powernv_cpu_reset(cpu);
> +}
> +
> +static void powernv_cpu_core_realize_child(Object *child, Error **errp)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs = CPU(child);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +    object_property_set_bool(child, true, "realized", &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +
> +    powernv_cpu_init(cpu, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +}
> +
> +static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)
> +{
> +    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));
> +    CPUCore *cc = CPU_CORE(OBJECT(dev));
> +    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));
> +    const char *typename = object_class_get_name(pcc->cpu_oc);
> +    size_t size = object_type_get_instance_size(typename);
> +    Error *local_err = NULL;
> +    void *obj;
> +    int i, j;
> +
> +
> +    pc->threads = g_malloc0(size * cc->nr_threads);
> +    for (i = 0; i < cc->nr_threads; i++) {
> +        char id[32];
> +        CPUState *cs;
> +
> +        obj = pc->threads + i * size;
> +
> +        object_initialize(obj, size, typename);
> +        cs = CPU(obj);
> +        cs->cpu_index = cc->core_id + i;
> +        snprintf(id, sizeof(id), "thread[%d]", i);
> +        object_property_add_child(OBJECT(pc), id, obj, &local_err);
> +        if (local_err) {
> +            goto err;
> +        }
> +        object_unref(obj);
> +    }
> +
> +    for (j = 0; j < cc->nr_threads; j++) {
> +        obj = pc->threads + j * size;
> +
> +        powernv_cpu_core_realize_child(obj, &local_err);
> +        if (local_err) {
> +            goto err;
> +        }
> +    }
> +    return;
> +
> +err:
> +    while (--i >= 0) {
> +        obj = pc->threads + i * size;
> +        object_unparent(obj);
> +    }
> +    g_free(pc->threads);
> +    error_propagate(errp, local_err);
> +}
> +
> +/*
> + * Grow this list or merge with SPAPRCoreInfo which is very similar
> + */
> +static const char *powernv_core_models[] = { "POWER8" };
> +
> +static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);
> +
> +    dc->realize = powernv_cpu_core_realize;
> +    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
> +}
> +
> +static const TypeInfo powernv_cpu_core_info = {
> +    .name           = TYPE_POWERNV_CPU_CORE,
> +    .parent         = TYPE_CPU_CORE,
> +    .instance_size  = sizeof(PowerNVCPUCore),
> +    .class_size     = sizeof(PowerNVCPUClass),
> +    .abstract       = true,
> +};
> +
> +static void powernv_cpu_core_register_types(void)
> +{
> +    int i ;
> +
> +    type_register_static(&powernv_cpu_core_info);
> +    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {
> +        TypeInfo ti = {
> +            .parent = TYPE_POWERNV_CPU_CORE,
> +            .instance_size = sizeof(PowerNVCPUCore),
> +            .class_init = powernv_cpu_core_class_init,
> +            .class_data = (void *) powernv_core_models[i],
> +        };
> +        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);
> +        type_register(&ti);
> +        g_free((void *)ti.name);
> +    }
> +}
> +
> +type_init(powernv_cpu_core_register_types)
> +
> +char *powernv_cpu_core_typename(const char *model)
> +{
> +    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);
> +}
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index 6907dc9e5c3d..9eac4b34a9b0 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -31,6 +31,10 @@ typedef struct PnvChip {
>  
>      /*< public >*/
>      uint32_t     chip_id;
> +    uint32_t     num_cores;
> +    char *cpu_model;
> +
> +    Object **cores;

So, unlike the chips within the machine, the cores within the chip
should probably be done like the threads within the core - a single
array with object_initialize() rather than an array of pointers.  AIUI
this is because having a (potentially) user instantiable object
automatically object_new() other things breaks QOM lifetime rules.  I
can't say I understand this point terribly well though, but i know it
was something we went several rounds on during the spapr core work
though.

>  } PnvChip;
>  
>  #define TYPE_POWERNV_MACHINE      "powernv-machine"
> @@ -43,9 +47,12 @@ typedef struct sPowerNVMachineState {
>  
>      uint32_t initrd_base;
>      long initrd_size;
> +    hwaddr fdt_addr;
>  
>      uint32_t  num_chips;
>      PnvChip   *chips;
>  } sPowerNVMachineState;
>  
> +#define PNV_TIMEBASE_FREQ           512000000ULL
> +
>  #endif /* _PPC_PNV_H */
> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
> new file mode 100644
> index 000000000000..88a09b0fd1c6
> --- /dev/null
> +++ b/include/hw/ppc/pnv_core.h
> @@ -0,0 +1,47 @@
> +/*
> + * QEMU PowerPC PowerNV CPU model
> + *
> + * Copyright (c) 2016 IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public License
> + * as published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef _PPC_PNV_CORE_H
> +#define _PPC_PNV_CORE_H
> +
> +#include "hw/cpu/core.h"
> +
> +#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"
> +#define POWERNV_CPU_CORE(obj) \
> +    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)
> +#define POWERNV_CPU_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)
> +#define POWERNV_CPU_GET_CLASS(obj) \
> +     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)
> +
> +typedef struct PowerNVCPUCore {
> +    /*< private >*/
> +    CPUCore parent_obj;
> +
> +    /*< public >*/
> +    void *threads;
> +} PowerNVCPUCore;
> +
> +typedef struct PowerNVCPUClass {
> +    DeviceClass parent_class;
> +    ObjectClass *cpu_oc;
> +}   PowerNVCPUClass;
> +
> +extern char *powernv_cpu_core_typename(const char *model);
> +
> +#endif /* _PPC_PNV_CORE_H */
Benjamin Herrenschmidt Aug. 16, 2016, 5:02 a.m. UTC | #2
On Tue, 2016-08-16 at 12:39 +1000, David Gibson wrote:
> On Fri, Aug 05, 2016 at 11:15:37AM +0200, Cédric Le Goater wrote:

> > 

> > This is largy inspired by sPAPRCPUCore with some simplification, no

> > hotplug for instance. But the differences are small and the objects

> > could possibly be merged.

> > 

> > A set of PowerNVCPUCore objects is added to the PnvChip and the device

> > tree is populated looping on these cores. Core ids in the device tree

> > are still a little fuzy. To be checked.


What about P9 where cores are in pairs to form EX and in pairs of EX
(ie, quads) to form EPs ?

> > So, it's not immediately obvious to me if you want an actual core

> object.  You could potentially create the actual vcpu objects directly

> from the chip object.  That assumes that any hotplug will only be at

> chip granularity, not core granularity, but I'm guessing that's the

> case anyway.


Well we want to add some of the core XSCOMs so it makes sense to have
a core object that is a XSCOM device

> > That said, if having the intermediate core object is helpful, you're

> certainly free to have it.

> 

> > 

> > > > Signed-off-by: Cédric Le Goater <clg@kaod.org>

> > ---

> >  hw/ppc/Makefile.objs      |   2 +-

> >  hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-

> >  hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++

> >  include/hw/ppc/pnv.h      |   7 ++

> >  include/hw/ppc/pnv_core.h |  47 +++++++++++++

> >  5 files changed, 383 insertions(+), 4 deletions(-)

> >  create mode 100644 hw/ppc/pnv_core.c

> >  create mode 100644 include/hw/ppc/pnv_core.h

> > 

> > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs

> > index 8105db7d5600..f8c7d1db9ade 100644

> > --- a/hw/ppc/Makefile.objs

> > +++ b/hw/ppc/Makefile.objs

> > @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o

> >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o

> >  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o

> >  # IBM PowerNV

> > -obj-$(CONFIG_POWERNV) += pnv.o

> > +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o

> >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)

> >  obj-y += spapr_pci_vfio.o

> >  endif

> > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c

> > index a680780e9dea..1219493c7218 100644

> > --- a/hw/ppc/pnv.c

> > +++ b/hw/ppc/pnv.c

> > @@ -35,6 +35,7 @@

> >  #include "hw/ppc/fdt.h"

> >  #include "hw/ppc/ppc.h"

> >  #include "hw/ppc/pnv.h"

> > +#include "hw/ppc/pnv_core.h"

> >  #include "hw/loader.h"

> >  #include "exec/address-spaces.h"

> >  #include "qemu/cutils.h"

> > @@ -112,6 +113,114 @@ static int powernv_populate_memory(void *fdt)

> >      return 0;

> >  }

> >  

> > +static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)

> > +{

> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);

> > +    int smt_threads = ppc_get_compat_smt_threads(cpu);

> > +    CPUPPCState *env = &cpu->env;

> > +    DeviceClass *dc = DEVICE_GET_CLASS(cs);

> > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);

> > +    uint32_t servers_prop[smt_threads];

> > +    uint32_t gservers_prop[smt_threads * 2];

> > +    int i, index = ppc_get_vcpu_dt_id(cpu);

> > +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),

> > +                       0xffffffff, 0xffffffff};

> > +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;

> > +    uint32_t cpufreq = 1000000000;

> > +    uint32_t page_sizes_prop[64];

> > +    size_t page_sizes_prop_size;

> > +    char *nodename;

> > +

> > +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);

> > +

> > +    _FDT((fdt_begin_node(fdt, nodename)));

> > +

> > +    g_free(nodename);

> > +

> > +    _FDT((fdt_property_cell(fdt, "reg", index)));

> > +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));

> 

> The handling of dt_id is going to collide with cleanups I want to do

> in this area (for spapr and the ppc cpu core).  Not sure there's a lot

> you can do to avoid that at this stage.

> 

> > 

> > +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));

> > +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",

> > +                            env->dcache_line_size)));

> > +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",

> > +                            env->dcache_line_size)));

> > +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",

> > +                            env->icache_line_size)));

> > +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",

> > +                            env->icache_line_size)));

> > +

> > +    if (pcc->l1_dcache_size) {

> > +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));

> > +    } else {

> > +        error_report("Warning: Unknown L1 dcache size for cpu");

> > +    }

> > +    if (pcc->l1_icache_size) {

> > +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));

> > +    } else {

> > +        error_report("Warning: Unknown L1 icache size for cpu");

> > +    }

> > +

> > +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));

> > +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));

> > +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));

> > +    _FDT((fdt_property_string(fdt, "status", "okay")));

> > +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));

> > +

> > +    if (env->spr_cb[SPR_PURR].oea_read) {

> > +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));

> > +    }

> > +

> > +    if (env->mmu_model & POWERPC_MMU_1TSEG) {

> > +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",

> > +                           segs, sizeof(segs))));

> > +    }

> > +

> > +    /* Advertise VMX/VSX (vector extensions) if available

> > +     *   0 / no property == no vector extensions

> > +     *   1               == VMX / Altivec available

> > +     *   2               == VSX available */

> > +    if (env->insns_flags & PPC_ALTIVEC) {

> > +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;

> > +

> > +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));

> > +    }

> > +

> > +    /* Advertise DFP (Decimal Floating Point) if available

> > +     *   0 / no property == no DFP

> > +     *   1               == DFP available */

> > +    if (env->insns_flags2 & PPC2_DFP) {

> > +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));

> > +    }

> > +

> > +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,

> > +                                                  sizeof(page_sizes_prop));

> > +    if (page_sizes_prop_size) {

> > +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",

> > +                           page_sizes_prop, page_sizes_prop_size)));

> > +    }

> > +

> > +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));

> > +

> > +    if (cpu->cpu_version) {

> > +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));

> > +    }

> > +

> > +    /* Build interrupt servers and gservers properties */

> > +    for (i = 0; i < smt_threads; i++) {

> > +        servers_prop[i] = cpu_to_be32(index + i);

> > +        /* Hack, direct the group queues back to cpu 0 */

> > +        gservers_prop[i * 2] = cpu_to_be32(index + i);

> > +        gservers_prop[i * 2 + 1] = 0;

> > +    }

> > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",

> > +                       servers_prop, sizeof(servers_prop))));

> > +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",

> > +                       gservers_prop, sizeof(gservers_prop))));

> > +

> > +    _FDT((fdt_end_node(fdt)));

> > +}

> > +

> >  static void *powernv_create_fdt(sPowerNVMachineState *pnv,

> >                                  const char *kernel_cmdline)

> >  {

> > @@ -120,6 +229,7 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,

> >      uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);

> >      char *buf;

> >      const char plat_compat[] = "qemu,powernv\0ibm,powernv";

> > +    int i;

> >  

> >      fdt = g_malloc0(FDT_MAX_SIZE);

> >      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));

> > @@ -156,6 +266,23 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,

> >      /* Memory */

> >      _FDT((powernv_populate_memory(fdt)));

> >  

> > +    /* cpus */

> > +    _FDT((fdt_begin_node(fdt, "cpus")));

> > +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));

> > +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));

> > +

> > +    for (i = 0; i < pnv->num_chips; i++) {

> > +        PnvChip *chip = &pnv->chips[i];

> > +        int j;

> > +

> > +        for (j = 0; j < chip->num_cores; j++) {

> > +            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);

> > +            CPUState *cs = CPU(DEVICE(pc->threads));

> > +            powernv_create_core_node(fdt, cs, chip->chip_id);

> 

> I think it would be nicer to define the fdt creation function in terms

> of the core object, and have it retrieve the representative thread itself.

> 

> > 

> > +        }

> > +    }

> > +    _FDT((fdt_end_node(fdt)));

> > +

> >      _FDT((fdt_end_node(fdt))); /* close root node */

> >      _FDT((fdt_finish(fdt)));

> >  

> > @@ -168,11 +295,13 @@ static void ppc_powernv_reset(void)

> >      sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);

> >      void *fdt;

> >  

> > +    pnv->fdt_addr = FDT_ADDR;

> > +

> >      qemu_devices_reset();

> >  

> >      fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);

> >  

> > -    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));

> > +    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));

> 

> These look like not really related changes, that should be maybe

> folded into 1/3.

> 

> > 

> >  }

> >  

> >  static void ppc_powernv_init(MachineState *machine)

> > @@ -252,6 +381,10 @@ static void ppc_powernv_init(MachineState *machine)

> >  

> >          object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);

> >          object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);

> > +        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",

> > +                                &error_abort);

> > +        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",

> > +                                &error_abort);

> 

> So various drafts of the spapr cores had a cpu-model parameter, but we

> eventually rejected it in favour of having different core types

> corresponding to the different, well, core types.  Unless there's a

> compelling reason otherwise, I think it would be nicer to do the same

> for the pnvchip objects - a p8pnvchip object will always create p8

> cores and so forth.

> 

> > 

> >          object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);

> >      }

> >  }

> > @@ -292,14 +425,35 @@ static const TypeInfo powernv_machine_2_8_info = {

> >      .class_init    = powernv_machine_2_8_class_init,

> >  };

> >  

> > -

> >  static void pnv_chip_realize(DeviceState *dev, Error **errp)

> >  {

> > -    ;

> > +    int i;

> > +    PnvChip *chip = PNV_CHIP(dev);

> > +    char *typename = powernv_cpu_core_typename(chip->cpu_model);

> > +

> > +    if (!object_class_by_name(typename)) {

> > +        error_setg(errp, "Unable to find PowerNV CPU Core definition");

> > +        return;

> > +    }

> > +

> > +    chip->cores = g_new0(Object *, chip->num_cores);

> > +    for (i = 0; i < chip->num_cores; i++) {

> > +        int core_id = i * smp_threads;

> > +        chip->cores[i] = object_new(typename);

> > +        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",

> > +                                &error_fatal);

> > +        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,

> > +                                &error_fatal);

> > +        object_property_set_bool(chip->cores[i], true, "realized",

> > +                                 &error_fatal);

> > +    }

> > +    g_free(typename);

> >  }

> >  

> >  static Property pnv_chip_properties[] = {

> >      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),

> > +    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),

> > +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),

> >      DEFINE_PROP_END_OF_LIST(),

> >  };

> >  

> > diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c

> > new file mode 100644

> > index 000000000000..1e36709db993

> > --- /dev/null

> > +++ b/hw/ppc/pnv_core.c

> > @@ -0,0 +1,171 @@

> > +/*

> > + * QEMU PowerPC PowerNV CPU model

> > + *

> > + * Copyright (c) IBM Corporation.

> > + *

> > + * This library is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public License

> > + * as published by the Free Software Foundation; either version 2 of

> > + * the License, or (at your option) any later version.

> > + *

> > + * This library is distributed in the hope that it will be useful, but

> > + * WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

> > + */

> > +#include "qemu/osdep.h"

> > +#include "sysemu/sysemu.h"

> > +#include "qapi/error.h"

> > +#include "target-ppc/cpu.h"

> > +#include "hw/ppc/ppc.h"

> > +#include "hw/ppc/pnv.h"

> > +#include "hw/ppc/pnv_core.h"

> > +

> > +static void powernv_cpu_reset(void *opaque)

> > +{

> > +    PowerPCCPU *cpu = opaque;

> > +    CPUState *cs = CPU(cpu);

> > +    CPUPPCState *env = &cpu->env;

> > +    MachineState *machine = MACHINE(qdev_get_machine());

> > +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);

> > +

> > +    cpu_reset(cs);

> > +

> > +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);

> 

> Is the PIR writable?  If not it might be better to move this to init

> rather than reset time.

> 

> > 

> > +    env->spr[SPR_HIOR] = 0;

> > +    env->gpr[3] = pnv->fdt_addr;

> > +    env->nip = 0x10;

> > +    env->msr |= MSR_HVB;

> > +}

> > +

> > +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)

> > +{

> > +    CPUPPCState *env = &cpu->env;

> > +

> > +    /* Set time-base frequency to 512 MHz */

> > +    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);

> > +

> > +    /* MSR[IP] doesn't exist nowadays */

> > +    env->msr_mask &= ~(1 << 6);

> > +

> > +    qemu_register_reset(powernv_cpu_reset, cpu);

> > +    powernv_cpu_reset(cpu);

> > +}

> > +

> > +static void powernv_cpu_core_realize_child(Object *child, Error **errp)

> > +{

> > +    Error *local_err = NULL;

> > +    CPUState *cs = CPU(child);

> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);

> > +

> > +    object_property_set_bool(child, true, "realized", &local_err);

> > +    if (local_err) {

> > +        error_propagate(errp, local_err);

> > +        return;

> > +    }

> > +

> > +    powernv_cpu_init(cpu, &local_err);

> > +    if (local_err) {

> > +        error_propagate(errp, local_err);

> > +        return;

> > +    }

> > +}

> > +

> > +static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)

> > +{

> > +    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));

> > +    CPUCore *cc = CPU_CORE(OBJECT(dev));

> > +    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));

> > +    const char *typename = object_class_get_name(pcc->cpu_oc);

> > +    size_t size = object_type_get_instance_size(typename);

> > +    Error *local_err = NULL;

> > +    void *obj;

> > +    int i, j;

> > +

> > +

> > +    pc->threads = g_malloc0(size * cc->nr_threads);

> > +    for (i = 0; i < cc->nr_threads; i++) {

> > +        char id[32];

> > +        CPUState *cs;

> > +

> > +        obj = pc->threads + i * size;

> > +

> > +        object_initialize(obj, size, typename);

> > +        cs = CPU(obj);

> > +        cs->cpu_index = cc->core_id + i;

> > +        snprintf(id, sizeof(id), "thread[%d]", i);

> > +        object_property_add_child(OBJECT(pc), id, obj, &local_err);

> > +        if (local_err) {

> > +            goto err;

> > +        }

> > +        object_unref(obj);

> > +    }

> > +

> > +    for (j = 0; j < cc->nr_threads; j++) {

> > +        obj = pc->threads + j * size;

> > +

> > +        powernv_cpu_core_realize_child(obj, &local_err);

> > +        if (local_err) {

> > +            goto err;

> > +        }

> > +    }

> > +    return;

> > +

> > +err:

> > +    while (--i >= 0) {

> > +        obj = pc->threads + i * size;

> > +        object_unparent(obj);

> > +    }

> > +    g_free(pc->threads);

> > +    error_propagate(errp, local_err);

> > +}

> > +

> > +/*

> > + * Grow this list or merge with SPAPRCoreInfo which is very similar

> > + */

> > +static const char *powernv_core_models[] = { "POWER8" };

> > +

> > +static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)

> > +{

> > +    DeviceClass *dc = DEVICE_CLASS(oc);

> > +    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);

> > +

> > +    dc->realize = powernv_cpu_core_realize;

> > +    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);

> > +}

> > +

> > +static const TypeInfo powernv_cpu_core_info = {

> > +    .name           = TYPE_POWERNV_CPU_CORE,

> > +    .parent         = TYPE_CPU_CORE,

> > +    .instance_size  = sizeof(PowerNVCPUCore),

> > +    .class_size     = sizeof(PowerNVCPUClass),

> > +    .abstract       = true,

> > +};

> > +

> > +static void powernv_cpu_core_register_types(void)

> > +{

> > +    int i ;

> > +

> > +    type_register_static(&powernv_cpu_core_info);

> > +    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {

> > +        TypeInfo ti = {

> > +            .parent = TYPE_POWERNV_CPU_CORE,

> > +            .instance_size = sizeof(PowerNVCPUCore),

> > +            .class_init = powernv_cpu_core_class_init,

> > +            .class_data = (void *) powernv_core_models[i],

> > +        };

> > +        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);

> > +        type_register(&ti);

> > +        g_free((void *)ti.name);

> > +    }

> > +}

> > +

> > +type_init(powernv_cpu_core_register_types)

> > +

> > +char *powernv_cpu_core_typename(const char *model)

> > +{

> > +    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);

> > +}

> > diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h

> > index 6907dc9e5c3d..9eac4b34a9b0 100644

> > --- a/include/hw/ppc/pnv.h

> > +++ b/include/hw/ppc/pnv.h

> > @@ -31,6 +31,10 @@ typedef struct PnvChip {

> >  

> >      /*< public >*/

> >      uint32_t     chip_id;

> > +    uint32_t     num_cores;

> > +    char *cpu_model;

> > +

> > +    Object **cores;

> 

> So, unlike the chips within the machine, the cores within the chip

> should probably be done like the threads within the core - a single

> array with object_initialize() rather than an array of pointers.  AIUI

> this is because having a (potentially) user instantiable object

> automatically object_new() other things breaks QOM lifetime rules.  I

> can't say I understand this point terribly well though, but i know it

> was something we went several rounds on during the spapr core work

> though.

> 

> > 

> >  } PnvChip;

> >  

> >  #define TYPE_POWERNV_MACHINE      "powernv-machine"

> > @@ -43,9 +47,12 @@ typedef struct sPowerNVMachineState {

> >  

> >      uint32_t initrd_base;

> >      long initrd_size;

> > +    hwaddr fdt_addr;

> >  

> >      uint32_t  num_chips;

> >      PnvChip   *chips;

> >  } sPowerNVMachineState;

> >  

> > +#define PNV_TIMEBASE_FREQ           512000000ULL

> > +

> >  #endif /* _PPC_PNV_H */

> > diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h

> > new file mode 100644

> > index 000000000000..88a09b0fd1c6

> > --- /dev/null

> > +++ b/include/hw/ppc/pnv_core.h

> > @@ -0,0 +1,47 @@

> > +/*

> > + * QEMU PowerPC PowerNV CPU model

> > + *

> > + * Copyright (c) 2016 IBM Corporation.

> > + *

> > + * This library is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public License

> > + * as published by the Free Software Foundation; either version 2 of

> > + * the License, or (at your option) any later version.

> > + *

> > + * This library is distributed in the hope that it will be useful, but

> > + * WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

> > + */

> > +#ifndef _PPC_PNV_CORE_H

> > +#define _PPC_PNV_CORE_H

> > +

> > +#include "hw/cpu/core.h"

> > +

> > +#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"

> > +#define POWERNV_CPU_CORE(obj) \

> > +    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)

> > +#define POWERNV_CPU_CLASS(klass) \

> > +     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)

> > +#define POWERNV_CPU_GET_CLASS(obj) \

> > +     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)

> > +

> > +typedef struct PowerNVCPUCore {

> > +    /*< private >*/

> > +    CPUCore parent_obj;

> > +

> > +    /*< public >*/

> > +    void *threads;

> > +} PowerNVCPUCore;

> > +

> > +typedef struct PowerNVCPUClass {

> > +    DeviceClass parent_class;

> > +    ObjectClass *cpu_oc;

> > +}   PowerNVCPUClass;

> > +

> > +extern char *powernv_cpu_core_typename(const char *model);

> > +

> > +#endif /* _PPC_PNV_CORE_H */

>
Cédric Le Goater Aug. 26, 2016, 5:49 p.m. UTC | #3
On 08/16/2016 04:39 AM, David Gibson wrote:
> On Fri, Aug 05, 2016 at 11:15:37AM +0200, Cédric Le Goater wrote:
>> This is largy inspired by sPAPRCPUCore with some simplification, no
>> hotplug for instance. But the differences are small and the objects
>> could possibly be merged.
>>
>> A set of PowerNVCPUCore objects is added to the PnvChip and the device
>> tree is populated looping on these cores. Core ids in the device tree
>> are still a little fuzy. To be checked.
> 
> So, it's not immediately obvious to me if you want an actual core
> object.  You could potentially create the actual vcpu objects directly
> from the chip object.  That assumes that any hotplug will only be at
> chip granularity, not core granularity, but I'm guessing that's the
> case anyway.
> 
> That said, if having the intermediate core object is helpful, you're
> certainly free to have it.

I would like to find some common ground with the spapr core. It should
be possible but for the sake of simplicity let's keep it that way.

>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  hw/ppc/Makefile.objs      |   2 +-
>>  hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-
>>  hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/pnv.h      |   7 ++
>>  include/hw/ppc/pnv_core.h |  47 +++++++++++++
>>  5 files changed, 383 insertions(+), 4 deletions(-)
>>  create mode 100644 hw/ppc/pnv_core.c
>>  create mode 100644 include/hw/ppc/pnv_core.h
>>
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index 8105db7d5600..f8c7d1db9ade 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>>  # IBM PowerNV
>> -obj-$(CONFIG_POWERNV) += pnv.o
>> +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
>>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>>  obj-y += spapr_pci_vfio.o
>>  endif
>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>> index a680780e9dea..1219493c7218 100644
>> --- a/hw/ppc/pnv.c
>> +++ b/hw/ppc/pnv.c
>> @@ -35,6 +35,7 @@
>>  #include "hw/ppc/fdt.h"
>>  #include "hw/ppc/ppc.h"
>>  #include "hw/ppc/pnv.h"
>> +#include "hw/ppc/pnv_core.h"
>>  #include "hw/loader.h"
>>  #include "exec/address-spaces.h"
>>  #include "qemu/cutils.h"
>> @@ -112,6 +113,114 @@ static int powernv_populate_memory(void *fdt)
>>      return 0;
>>  }
>>  
>> +static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)
>> +{
>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +    int smt_threads = ppc_get_compat_smt_threads(cpu);
>> +    CPUPPCState *env = &cpu->env;
>> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
>> +    uint32_t servers_prop[smt_threads];
>> +    uint32_t gservers_prop[smt_threads * 2];
>> +    int i, index = ppc_get_vcpu_dt_id(cpu);
>> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
>> +                       0xffffffff, 0xffffffff};
>> +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
>> +    uint32_t cpufreq = 1000000000;
>> +    uint32_t page_sizes_prop[64];
>> +    size_t page_sizes_prop_size;
>> +    char *nodename;
>> +
>> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
>> +
>> +    _FDT((fdt_begin_node(fdt, nodename)));
>> +
>> +    g_free(nodename);
>> +
>> +    _FDT((fdt_property_cell(fdt, "reg", index)));
>> +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> 
> The handling of dt_id is going to collide with cleanups I want to do
> in this area (for spapr and the ppc cpu core).  Not sure there's a lot
> you can do to avoid that at this stage.

We will adapt. When I reworked the device tree to use the "rw" functions, 
I took a closer look at the powernv needs. Nothing alarming.

The cores are numbered in the (processor) chip. The maximum number of 
cores depends on the model (the max of the max is 12 for Venice) and 
they are not contiguous so we should be activating them with a bitmap 
per chip. 

The core id plus the chip id make a PIR, which is what we use as a dt_id.
For example :

	PowerPC,POWER8@8a8/ibm,chip-id
			 00000011 (17)
	PowerPC,POWER8@8a8/reg
			 000008a8 (2216)
	PowerPC,POWER8@8a8/ibm,pir
			 000008a8 (2216)	

Ben provided a good summary in skiboot, here :

	https://github.com/open-power/skiboot/blob/master/include/chip.h

May be we can find a good formula for cpu->cpu_dt_id. to be studied.


>> +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
>> +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
>> +                            env->dcache_line_size)));
>> +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
>> +                            env->dcache_line_size)));
>> +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
>> +                            env->icache_line_size)));
>> +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
>> +                            env->icache_line_size)));
>> +
>> +    if (pcc->l1_dcache_size) {
>> +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
>> +    } else {
>> +        error_report("Warning: Unknown L1 dcache size for cpu");
>> +    }
>> +    if (pcc->l1_icache_size) {
>> +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
>> +    } else {
>> +        error_report("Warning: Unknown L1 icache size for cpu");
>> +    }
>> +
>> +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
>> +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
>> +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
>> +    _FDT((fdt_property_string(fdt, "status", "okay")));
>> +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
>> +
>> +    if (env->spr_cb[SPR_PURR].oea_read) {
>> +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
>> +    }
>> +
>> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
>> +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
>> +                           segs, sizeof(segs))));
>> +    }
>> +
>> +    /* Advertise VMX/VSX (vector extensions) if available
>> +     *   0 / no property == no vector extensions
>> +     *   1               == VMX / Altivec available
>> +     *   2               == VSX available */
>> +    if (env->insns_flags & PPC_ALTIVEC) {
>> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
>> +
>> +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
>> +    }
>> +
>> +    /* Advertise DFP (Decimal Floating Point) if available
>> +     *   0 / no property == no DFP
>> +     *   1               == DFP available */
>> +    if (env->insns_flags2 & PPC2_DFP) {
>> +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
>> +    }
>> +
>> +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
>> +                                                  sizeof(page_sizes_prop));
>> +    if (page_sizes_prop_size) {
>> +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
>> +                           page_sizes_prop, page_sizes_prop_size)));
>> +    }
>> +
>> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
>> +
>> +    if (cpu->cpu_version) {
>> +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
>> +    }
>> +
>> +    /* Build interrupt servers and gservers properties */
>> +    for (i = 0; i < smt_threads; i++) {
>> +        servers_prop[i] = cpu_to_be32(index + i);
>> +        /* Hack, direct the group queues back to cpu 0 */
>> +        gservers_prop[i * 2] = cpu_to_be32(index + i);
>> +        gservers_prop[i * 2 + 1] = 0;
>> +    }
>> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
>> +                       servers_prop, sizeof(servers_prop))));
>> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
>> +                       gservers_prop, sizeof(gservers_prop))));
>> +
>> +    _FDT((fdt_end_node(fdt)));
>> +}
>> +
>>  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>                                  const char *kernel_cmdline)
>>  {
>> @@ -120,6 +229,7 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>      uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
>>      char *buf;
>>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
>> +    int i;
>>  
>>      fdt = g_malloc0(FDT_MAX_SIZE);
>>      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
>> @@ -156,6 +266,23 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>      /* Memory */
>>      _FDT((powernv_populate_memory(fdt)));
>>  
>> +    /* cpus */
>> +    _FDT((fdt_begin_node(fdt, "cpus")));
>> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
>> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
>> +
>> +    for (i = 0; i < pnv->num_chips; i++) {
>> +        PnvChip *chip = &pnv->chips[i];
>> +        int j;
>> +
>> +        for (j = 0; j < chip->num_cores; j++) {
>> +            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);
>> +            CPUState *cs = CPU(DEVICE(pc->threads));
>> +            powernv_create_core_node(fdt, cs, chip->chip_id);
> 
> I think it would be nicer to define the fdt creation function in terms
> of the core object, and have it retrieve the representative thread itself.

ok. yes, I will try to refresh that.

>> +        }
>> +    }
>> +    _FDT((fdt_end_node(fdt)));
>> +
>>      _FDT((fdt_end_node(fdt))); /* close root node */
>>      _FDT((fdt_finish(fdt)));
>>  
>> @@ -168,11 +295,13 @@ static void ppc_powernv_reset(void)
>>      sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>>      void *fdt;
>>  
>> +    pnv->fdt_addr = FDT_ADDR;
>> +
>>      qemu_devices_reset();
>>  
>>      fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
>>  
>> -    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
>> +    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));
> 
> These look like not really related changes, that should be maybe
> folded into 1/3.

yes.

>>  }
>>  
>>  static void ppc_powernv_init(MachineState *machine)
>> @@ -252,6 +381,10 @@ static void ppc_powernv_init(MachineState *machine)
>>  
>>          object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);
>>          object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);
>> +        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",
>> +                                &error_abort);
>> +        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",
>> +                                &error_abort);
> 
> So various drafts of the spapr cores had a cpu-model parameter, but we
> eventually rejected it in favour of having different core types
> corresponding to the different, well, core types.  Unless there's a
> compelling reason otherwise, I think it would be nicer to do the same
> for the pnvchip objects - a p8pnvchip object will always create p8
> cores and so forth.

yes. This is fixed in the current patchset. 

>>          object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);
>>      }
>>  }
>> @@ -292,14 +425,35 @@ static const TypeInfo powernv_machine_2_8_info = {
>>      .class_init    = powernv_machine_2_8_class_init,
>>  };
>>  
>> -
>>  static void pnv_chip_realize(DeviceState *dev, Error **errp)
>>  {
>> -    ;
>> +    int i;
>> +    PnvChip *chip = PNV_CHIP(dev);
>> +    char *typename = powernv_cpu_core_typename(chip->cpu_model);
>> +
>> +    if (!object_class_by_name(typename)) {
>> +        error_setg(errp, "Unable to find PowerNV CPU Core definition");
>> +        return;
>> +    }
>> +
>> +    chip->cores = g_new0(Object *, chip->num_cores);
>> +    for (i = 0; i < chip->num_cores; i++) {
>> +        int core_id = i * smp_threads;
>> +        chip->cores[i] = object_new(typename);
>> +        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",
>> +                                &error_fatal);
>> +        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,
>> +                                &error_fatal);
>> +        object_property_set_bool(chip->cores[i], true, "realized",
>> +                                 &error_fatal);
>> +    }
>> +    g_free(typename);
>>  }
>>  
>>  static Property pnv_chip_properties[] = {
>>      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
>> +    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),
>> +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
>> new file mode 100644
>> index 000000000000..1e36709db993
>> --- /dev/null
>> +++ b/hw/ppc/pnv_core.c
>> @@ -0,0 +1,171 @@
>> +/*
>> + * QEMU PowerPC PowerNV CPU model
>> + *
>> + * Copyright (c) IBM Corporation.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public License
>> + * as published by the Free Software Foundation; either version 2 of
>> + * the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#include "qemu/osdep.h"
>> +#include "sysemu/sysemu.h"
>> +#include "qapi/error.h"
>> +#include "target-ppc/cpu.h"
>> +#include "hw/ppc/ppc.h"
>> +#include "hw/ppc/pnv.h"
>> +#include "hw/ppc/pnv_core.h"
>> +
>> +static void powernv_cpu_reset(void *opaque)
>> +{
>> +    PowerPCCPU *cpu = opaque;
>> +    CPUState *cs = CPU(cpu);
>> +    CPUPPCState *env = &cpu->env;
>> +    MachineState *machine = MACHINE(qdev_get_machine());
>> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>> +
>> +    cpu_reset(cs);
>> +
>> +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
> 
> Is the PIR writable?  If not it might be better to move this to init
> rather than reset time.

It should not. OK.

>> +    env->spr[SPR_HIOR] = 0;
>> +    env->gpr[3] = pnv->fdt_addr;
>> +    env->nip = 0x10;
>> +    env->msr |= MSR_HVB;
>> +}
>> +
>> +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
>> +{
>> +    CPUPPCState *env = &cpu->env;
>> +
>> +    /* Set time-base frequency to 512 MHz */
>> +    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
>> +
>> +    /* MSR[IP] doesn't exist nowadays */
>> +    env->msr_mask &= ~(1 << 6);
>> +
>> +    qemu_register_reset(powernv_cpu_reset, cpu);
>> +    powernv_cpu_reset(cpu);
>> +}
>> +
>> +static void powernv_cpu_core_realize_child(Object *child, Error **errp)
>> +{
>> +    Error *local_err = NULL;
>> +    CPUState *cs = CPU(child);
>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>> +
>> +    object_property_set_bool(child, true, "realized", &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +
>> +    powernv_cpu_init(cpu, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return;
>> +    }
>> +}
>> +
>> +static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)
>> +{
>> +    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));
>> +    CPUCore *cc = CPU_CORE(OBJECT(dev));
>> +    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));
>> +    const char *typename = object_class_get_name(pcc->cpu_oc);
>> +    size_t size = object_type_get_instance_size(typename);
>> +    Error *local_err = NULL;
>> +    void *obj;
>> +    int i, j;
>> +
>> +
>> +    pc->threads = g_malloc0(size * cc->nr_threads);
>> +    for (i = 0; i < cc->nr_threads; i++) {
>> +        char id[32];
>> +        CPUState *cs;
>> +
>> +        obj = pc->threads + i * size;
>> +
>> +        object_initialize(obj, size, typename);
>> +        cs = CPU(obj);
>> +        cs->cpu_index = cc->core_id + i;
>> +        snprintf(id, sizeof(id), "thread[%d]", i);
>> +        object_property_add_child(OBJECT(pc), id, obj, &local_err);
>> +        if (local_err) {
>> +            goto err;
>> +        }
>> +        object_unref(obj);
>> +    }
>> +
>> +    for (j = 0; j < cc->nr_threads; j++) {
>> +        obj = pc->threads + j * size;
>> +
>> +        powernv_cpu_core_realize_child(obj, &local_err);
>> +        if (local_err) {
>> +            goto err;
>> +        }
>> +    }
>> +    return;
>> +
>> +err:
>> +    while (--i >= 0) {
>> +        obj = pc->threads + i * size;
>> +        object_unparent(obj);
>> +    }
>> +    g_free(pc->threads);
>> +    error_propagate(errp, local_err);
>> +}
>> +
>> +/*
>> + * Grow this list or merge with SPAPRCoreInfo which is very similar
>> + */
>> +static const char *powernv_core_models[] = { "POWER8" };
>> +
>> +static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);
>> +
>> +    dc->realize = powernv_cpu_core_realize;
>> +    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
>> +}
>> +
>> +static const TypeInfo powernv_cpu_core_info = {
>> +    .name           = TYPE_POWERNV_CPU_CORE,
>> +    .parent         = TYPE_CPU_CORE,
>> +    .instance_size  = sizeof(PowerNVCPUCore),
>> +    .class_size     = sizeof(PowerNVCPUClass),
>> +    .abstract       = true,
>> +};
>> +
>> +static void powernv_cpu_core_register_types(void)
>> +{
>> +    int i ;
>> +
>> +    type_register_static(&powernv_cpu_core_info);
>> +    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {
>> +        TypeInfo ti = {
>> +            .parent = TYPE_POWERNV_CPU_CORE,
>> +            .instance_size = sizeof(PowerNVCPUCore),
>> +            .class_init = powernv_cpu_core_class_init,
>> +            .class_data = (void *) powernv_core_models[i],
>> +        };
>> +        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);
>> +        type_register(&ti);
>> +        g_free((void *)ti.name);
>> +    }
>> +}
>> +
>> +type_init(powernv_cpu_core_register_types)
>> +
>> +char *powernv_cpu_core_typename(const char *model)
>> +{
>> +    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);
>> +}
>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>> index 6907dc9e5c3d..9eac4b34a9b0 100644
>> --- a/include/hw/ppc/pnv.h
>> +++ b/include/hw/ppc/pnv.h
>> @@ -31,6 +31,10 @@ typedef struct PnvChip {
>>  
>>      /*< public >*/
>>      uint32_t     chip_id;
>> +    uint32_t     num_cores;
>> +    char *cpu_model;
>> +
>> +    Object **cores;
> 
> So, unlike the chips within the machine, the cores within the chip
> should probably be done like the threads within the core - a single
> array with object_initialize() rather than an array of pointers.  AIUI
> this is because having a (potentially) user instantiable object
> automatically object_new() other things breaks QOM lifetime rules.  I
> can't say I understand this point terribly well though, but i know it
> was something we went several rounds on during the spapr core work
> though.

ok. I will look into it if this is possible, but I see : 

	struct sPAPRMachineState {
	    ...
	    Object **cores;
	};

So spapr is not instantiating cores in the prefered way ? That does
mean pnv should do the same.

Thanks,

C. 


>>  } PnvChip;
>>  
>>  #define TYPE_POWERNV_MACHINE      "powernv-machine"
>> @@ -43,9 +47,12 @@ typedef struct sPowerNVMachineState {
>>  
>>      uint32_t initrd_base;
>>      long initrd_size;
>> +    hwaddr fdt_addr;
>>  
>>      uint32_t  num_chips;
>>      PnvChip   *chips;
>>  } sPowerNVMachineState;
>>  
>> +#define PNV_TIMEBASE_FREQ           512000000ULL
>> +
>>  #endif /* _PPC_PNV_H */
>> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
>> new file mode 100644
>> index 000000000000..88a09b0fd1c6
>> --- /dev/null
>> +++ b/include/hw/ppc/pnv_core.h
>> @@ -0,0 +1,47 @@
>> +/*
>> + * QEMU PowerPC PowerNV CPU model
>> + *
>> + * Copyright (c) 2016 IBM Corporation.
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public License
>> + * as published by the Free Software Foundation; either version 2 of
>> + * the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#ifndef _PPC_PNV_CORE_H
>> +#define _PPC_PNV_CORE_H
>> +
>> +#include "hw/cpu/core.h"
>> +
>> +#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"
>> +#define POWERNV_CPU_CORE(obj) \
>> +    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)
>> +#define POWERNV_CPU_CLASS(klass) \
>> +     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)
>> +#define POWERNV_CPU_GET_CLASS(obj) \
>> +     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)
>> +
>> +typedef struct PowerNVCPUCore {
>> +    /*< private >*/
>> +    CPUCore parent_obj;
>> +
>> +    /*< public >*/
>> +    void *threads;
>> +} PowerNVCPUCore;
>> +
>> +typedef struct PowerNVCPUClass {
>> +    DeviceClass parent_class;
>> +    ObjectClass *cpu_oc;
>> +}   PowerNVCPUClass;
>> +
>> +extern char *powernv_cpu_core_typename(const char *model);
>> +
>> +#endif /* _PPC_PNV_CORE_H */
>
Cédric Le Goater Aug. 26, 2016, 5:55 p.m. UTC | #4
On 08/16/2016 07:02 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2016-08-16 at 12:39 +1000, David Gibson wrote:
>> On Fri, Aug 05, 2016 at 11:15:37AM +0200, Cédric Le Goater wrote:
>>>
>>> This is largy inspired by sPAPRCPUCore with some simplification, no
>>> hotplug for instance. But the differences are small and the objects
>>> could possibly be merged.
>>>
>>> A set of PowerNVCPUCore objects is added to the PnvChip and the device
>>> tree is populated looping on these cores. Core ids in the device tree
>>> are still a little fuzy. To be checked.
> 
> What about P9 where cores are in pairs to form EX and in pairs of EX
> (ie, quads) to form EPs ?

Sounds fun ! I need to dig in the P9 specs to have a better idea. I lack 
the knowledge right now.

>>> So, it's not immediately obvious to me if you want an actual core
>> object.  You could potentially create the actual vcpu objects directly
>> from the chip object.  That assumes that any hotplug will only be at
>> chip granularity, not core granularity, but I'm guessing that's the
>> case anyway.
> 
> Well we want to add some of the core XSCOMs so it makes sense to have
> a core object that is a XSCOM device

ok. I have will look into that. 

Thanks,

C. 

>>> That said, if having the intermediate core object is helpful, you're
>> certainly free to have it.
>>
>>>
>>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>> ---
>>>  hw/ppc/Makefile.objs      |   2 +-
>>>  hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-
>>>  hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++
>>>  include/hw/ppc/pnv.h      |   7 ++
>>>  include/hw/ppc/pnv_core.h |  47 +++++++++++++
>>>  5 files changed, 383 insertions(+), 4 deletions(-)
>>>  create mode 100644 hw/ppc/pnv_core.c
>>>  create mode 100644 include/hw/ppc/pnv_core.h
>>>
>>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>>> index 8105db7d5600..f8c7d1db9ade 100644
>>> --- a/hw/ppc/Makefile.objs
>>> +++ b/hw/ppc/Makefile.objs
>>> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>>>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>>>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>>>  # IBM PowerNV
>>> -obj-$(CONFIG_POWERNV) += pnv.o
>>> +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
>>>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>>>  obj-y += spapr_pci_vfio.o
>>>  endif
>>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>>> index a680780e9dea..1219493c7218 100644
>>> --- a/hw/ppc/pnv.c
>>> +++ b/hw/ppc/pnv.c
>>> @@ -35,6 +35,7 @@
>>>  #include "hw/ppc/fdt.h"
>>>  #include "hw/ppc/ppc.h"
>>>  #include "hw/ppc/pnv.h"
>>> +#include "hw/ppc/pnv_core.h"
>>>  #include "hw/loader.h"
>>>  #include "exec/address-spaces.h"
>>>  #include "qemu/cutils.h"
>>> @@ -112,6 +113,114 @@ static int powernv_populate_memory(void *fdt)
>>>      return 0;
>>>  }
>>>  
>>> +static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)
>>> +{
>>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>>> +    int smt_threads = ppc_get_compat_smt_threads(cpu);
>>> +    CPUPPCState *env = &cpu->env;
>>> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
>>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
>>> +    uint32_t servers_prop[smt_threads];
>>> +    uint32_t gservers_prop[smt_threads * 2];
>>> +    int i, index = ppc_get_vcpu_dt_id(cpu);
>>> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
>>> +                       0xffffffff, 0xffffffff};
>>> +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
>>> +    uint32_t cpufreq = 1000000000;
>>> +    uint32_t page_sizes_prop[64];
>>> +    size_t page_sizes_prop_size;
>>> +    char *nodename;
>>> +
>>> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
>>> +
>>> +    _FDT((fdt_begin_node(fdt, nodename)));
>>> +
>>> +    g_free(nodename);
>>> +
>>> +    _FDT((fdt_property_cell(fdt, "reg", index)));
>>> +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
>>
>> The handling of dt_id is going to collide with cleanups I want to do
>> in this area (for spapr and the ppc cpu core).  Not sure there's a lot
>> you can do to avoid that at this stage.
>>
>>>
>>> +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
>>> +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
>>> +                            env->dcache_line_size)));
>>> +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
>>> +                            env->dcache_line_size)));
>>> +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
>>> +                            env->icache_line_size)));
>>> +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
>>> +                            env->icache_line_size)));
>>> +
>>> +    if (pcc->l1_dcache_size) {
>>> +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
>>> +    } else {
>>> +        error_report("Warning: Unknown L1 dcache size for cpu");
>>> +    }
>>> +    if (pcc->l1_icache_size) {
>>> +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
>>> +    } else {
>>> +        error_report("Warning: Unknown L1 icache size for cpu");
>>> +    }
>>> +
>>> +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
>>> +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
>>> +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
>>> +    _FDT((fdt_property_string(fdt, "status", "okay")));
>>> +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
>>> +
>>> +    if (env->spr_cb[SPR_PURR].oea_read) {
>>> +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
>>> +    }
>>> +
>>> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
>>> +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
>>> +                           segs, sizeof(segs))));
>>> +    }
>>> +
>>> +    /* Advertise VMX/VSX (vector extensions) if available
>>> +     *   0 / no property == no vector extensions
>>> +     *   1               == VMX / Altivec available
>>> +     *   2               == VSX available */
>>> +    if (env->insns_flags & PPC_ALTIVEC) {
>>> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
>>> +
>>> +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
>>> +    }
>>> +
>>> +    /* Advertise DFP (Decimal Floating Point) if available
>>> +     *   0 / no property == no DFP
>>> +     *   1               == DFP available */
>>> +    if (env->insns_flags2 & PPC2_DFP) {
>>> +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
>>> +    }
>>> +
>>> +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
>>> +                                                  sizeof(page_sizes_prop));
>>> +    if (page_sizes_prop_size) {
>>> +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
>>> +                           page_sizes_prop, page_sizes_prop_size)));
>>> +    }
>>> +
>>> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
>>> +
>>> +    if (cpu->cpu_version) {
>>> +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
>>> +    }
>>> +
>>> +    /* Build interrupt servers and gservers properties */
>>> +    for (i = 0; i < smt_threads; i++) {
>>> +        servers_prop[i] = cpu_to_be32(index + i);
>>> +        /* Hack, direct the group queues back to cpu 0 */
>>> +        gservers_prop[i * 2] = cpu_to_be32(index + i);
>>> +        gservers_prop[i * 2 + 1] = 0;
>>> +    }
>>> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
>>> +                       servers_prop, sizeof(servers_prop))));
>>> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
>>> +                       gservers_prop, sizeof(gservers_prop))));
>>> +
>>> +    _FDT((fdt_end_node(fdt)));
>>> +}
>>> +
>>>  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>>                                  const char *kernel_cmdline)
>>>  {
>>> @@ -120,6 +229,7 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>>      uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
>>>      char *buf;
>>>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
>>> +    int i;
>>>  
>>>      fdt = g_malloc0(FDT_MAX_SIZE);
>>>      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
>>> @@ -156,6 +266,23 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
>>>      /* Memory */
>>>      _FDT((powernv_populate_memory(fdt)));
>>>  
>>> +    /* cpus */
>>> +    _FDT((fdt_begin_node(fdt, "cpus")));
>>> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
>>> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
>>> +
>>> +    for (i = 0; i < pnv->num_chips; i++) {
>>> +        PnvChip *chip = &pnv->chips[i];
>>> +        int j;
>>> +
>>> +        for (j = 0; j < chip->num_cores; j++) {
>>> +            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);
>>> +            CPUState *cs = CPU(DEVICE(pc->threads));
>>> +            powernv_create_core_node(fdt, cs, chip->chip_id);
>>
>> I think it would be nicer to define the fdt creation function in terms
>> of the core object, and have it retrieve the representative thread itself.
>>
>>>
>>> +        }
>>> +    }
>>> +    _FDT((fdt_end_node(fdt)));
>>> +
>>>      _FDT((fdt_end_node(fdt))); /* close root node */
>>>      _FDT((fdt_finish(fdt)));
>>>  
>>> @@ -168,11 +295,13 @@ static void ppc_powernv_reset(void)
>>>      sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>>>      void *fdt;
>>>  
>>> +    pnv->fdt_addr = FDT_ADDR;
>>> +
>>>      qemu_devices_reset();
>>>  
>>>      fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
>>>  
>>> -    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
>>> +    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));
>>
>> These look like not really related changes, that should be maybe
>> folded into 1/3.
>>
>>>
>>>  }
>>>  
>>>  static void ppc_powernv_init(MachineState *machine)
>>> @@ -252,6 +381,10 @@ static void ppc_powernv_init(MachineState *machine)
>>>  
>>>          object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);
>>>          object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);
>>> +        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",
>>> +                                &error_abort);
>>> +        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",
>>> +                                &error_abort);
>>
>> So various drafts of the spapr cores had a cpu-model parameter, but we
>> eventually rejected it in favour of having different core types
>> corresponding to the different, well, core types.  Unless there's a
>> compelling reason otherwise, I think it would be nicer to do the same
>> for the pnvchip objects - a p8pnvchip object will always create p8
>> cores and so forth.
>>
>>>
>>>          object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);
>>>      }
>>>  }
>>> @@ -292,14 +425,35 @@ static const TypeInfo powernv_machine_2_8_info = {
>>>      .class_init    = powernv_machine_2_8_class_init,
>>>  };
>>>  
>>> -
>>>  static void pnv_chip_realize(DeviceState *dev, Error **errp)
>>>  {
>>> -    ;
>>> +    int i;
>>> +    PnvChip *chip = PNV_CHIP(dev);
>>> +    char *typename = powernv_cpu_core_typename(chip->cpu_model);
>>> +
>>> +    if (!object_class_by_name(typename)) {
>>> +        error_setg(errp, "Unable to find PowerNV CPU Core definition");
>>> +        return;
>>> +    }
>>> +
>>> +    chip->cores = g_new0(Object *, chip->num_cores);
>>> +    for (i = 0; i < chip->num_cores; i++) {
>>> +        int core_id = i * smp_threads;
>>> +        chip->cores[i] = object_new(typename);
>>> +        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",
>>> +                                &error_fatal);
>>> +        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,
>>> +                                &error_fatal);
>>> +        object_property_set_bool(chip->cores[i], true, "realized",
>>> +                                 &error_fatal);
>>> +    }
>>> +    g_free(typename);
>>>  }
>>>  
>>>  static Property pnv_chip_properties[] = {
>>>      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
>>> +    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),
>>> +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
>>>      DEFINE_PROP_END_OF_LIST(),
>>>  };
>>>  
>>> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
>>> new file mode 100644
>>> index 000000000000..1e36709db993
>>> --- /dev/null
>>> +++ b/hw/ppc/pnv_core.c
>>> @@ -0,0 +1,171 @@
>>> +/*
>>> + * QEMU PowerPC PowerNV CPU model
>>> + *
>>> + * Copyright (c) IBM Corporation.
>>> + *
>>> + * This library is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public License
>>> + * as published by the Free Software Foundation; either version 2 of
>>> + * the License, or (at your option) any later version.
>>> + *
>>> + * This library is distributed in the hope that it will be useful, but
>>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +#include "qemu/osdep.h"
>>> +#include "sysemu/sysemu.h"
>>> +#include "qapi/error.h"
>>> +#include "target-ppc/cpu.h"
>>> +#include "hw/ppc/ppc.h"
>>> +#include "hw/ppc/pnv.h"
>>> +#include "hw/ppc/pnv_core.h"
>>> +
>>> +static void powernv_cpu_reset(void *opaque)
>>> +{
>>> +    PowerPCCPU *cpu = opaque;
>>> +    CPUState *cs = CPU(cpu);
>>> +    CPUPPCState *env = &cpu->env;
>>> +    MachineState *machine = MACHINE(qdev_get_machine());
>>> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
>>> +
>>> +    cpu_reset(cs);
>>> +
>>> +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
>>
>> Is the PIR writable?  If not it might be better to move this to init
>> rather than reset time.
>>
>>>
>>> +    env->spr[SPR_HIOR] = 0;
>>> +    env->gpr[3] = pnv->fdt_addr;
>>> +    env->nip = 0x10;
>>> +    env->msr |= MSR_HVB;
>>> +}
>>> +
>>> +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
>>> +{
>>> +    CPUPPCState *env = &cpu->env;
>>> +
>>> +    /* Set time-base frequency to 512 MHz */
>>> +    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
>>> +
>>> +    /* MSR[IP] doesn't exist nowadays */
>>> +    env->msr_mask &= ~(1 << 6);
>>> +
>>> +    qemu_register_reset(powernv_cpu_reset, cpu);
>>> +    powernv_cpu_reset(cpu);
>>> +}
>>> +
>>> +static void powernv_cpu_core_realize_child(Object *child, Error **errp)
>>> +{
>>> +    Error *local_err = NULL;
>>> +    CPUState *cs = CPU(child);
>>> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
>>> +
>>> +    object_property_set_bool(child, true, "realized", &local_err);
>>> +    if (local_err) {
>>> +        error_propagate(errp, local_err);
>>> +        return;
>>> +    }
>>> +
>>> +    powernv_cpu_init(cpu, &local_err);
>>> +    if (local_err) {
>>> +        error_propagate(errp, local_err);
>>> +        return;
>>> +    }
>>> +}
>>> +
>>> +static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)
>>> +{
>>> +    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));
>>> +    CPUCore *cc = CPU_CORE(OBJECT(dev));
>>> +    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));
>>> +    const char *typename = object_class_get_name(pcc->cpu_oc);
>>> +    size_t size = object_type_get_instance_size(typename);
>>> +    Error *local_err = NULL;
>>> +    void *obj;
>>> +    int i, j;
>>> +
>>> +
>>> +    pc->threads = g_malloc0(size * cc->nr_threads);
>>> +    for (i = 0; i < cc->nr_threads; i++) {
>>> +        char id[32];
>>> +        CPUState *cs;
>>> +
>>> +        obj = pc->threads + i * size;
>>> +
>>> +        object_initialize(obj, size, typename);
>>> +        cs = CPU(obj);
>>> +        cs->cpu_index = cc->core_id + i;
>>> +        snprintf(id, sizeof(id), "thread[%d]", i);
>>> +        object_property_add_child(OBJECT(pc), id, obj, &local_err);
>>> +        if (local_err) {
>>> +            goto err;
>>> +        }
>>> +        object_unref(obj);
>>> +    }
>>> +
>>> +    for (j = 0; j < cc->nr_threads; j++) {
>>> +        obj = pc->threads + j * size;
>>> +
>>> +        powernv_cpu_core_realize_child(obj, &local_err);
>>> +        if (local_err) {
>>> +            goto err;
>>> +        }
>>> +    }
>>> +    return;
>>> +
>>> +err:
>>> +    while (--i >= 0) {
>>> +        obj = pc->threads + i * size;
>>> +        object_unparent(obj);
>>> +    }
>>> +    g_free(pc->threads);
>>> +    error_propagate(errp, local_err);
>>> +}
>>> +
>>> +/*
>>> + * Grow this list or merge with SPAPRCoreInfo which is very similar
>>> + */
>>> +static const char *powernv_core_models[] = { "POWER8" };
>>> +
>>> +static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)
>>> +{
>>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>>> +    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);
>>> +
>>> +    dc->realize = powernv_cpu_core_realize;
>>> +    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
>>> +}
>>> +
>>> +static const TypeInfo powernv_cpu_core_info = {
>>> +    .name           = TYPE_POWERNV_CPU_CORE,
>>> +    .parent         = TYPE_CPU_CORE,
>>> +    .instance_size  = sizeof(PowerNVCPUCore),
>>> +    .class_size     = sizeof(PowerNVCPUClass),
>>> +    .abstract       = true,
>>> +};
>>> +
>>> +static void powernv_cpu_core_register_types(void)
>>> +{
>>> +    int i ;
>>> +
>>> +    type_register_static(&powernv_cpu_core_info);
>>> +    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {
>>> +        TypeInfo ti = {
>>> +            .parent = TYPE_POWERNV_CPU_CORE,
>>> +            .instance_size = sizeof(PowerNVCPUCore),
>>> +            .class_init = powernv_cpu_core_class_init,
>>> +            .class_data = (void *) powernv_core_models[i],
>>> +        };
>>> +        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);
>>> +        type_register(&ti);
>>> +        g_free((void *)ti.name);
>>> +    }
>>> +}
>>> +
>>> +type_init(powernv_cpu_core_register_types)
>>> +
>>> +char *powernv_cpu_core_typename(const char *model)
>>> +{
>>> +    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);
>>> +}
>>> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
>>> index 6907dc9e5c3d..9eac4b34a9b0 100644
>>> --- a/include/hw/ppc/pnv.h
>>> +++ b/include/hw/ppc/pnv.h
>>> @@ -31,6 +31,10 @@ typedef struct PnvChip {
>>>  
>>>      /*< public >*/
>>>      uint32_t     chip_id;
>>> +    uint32_t     num_cores;
>>> +    char *cpu_model;
>>> +
>>> +    Object **cores;
>>
>> So, unlike the chips within the machine, the cores within the chip
>> should probably be done like the threads within the core - a single
>> array with object_initialize() rather than an array of pointers.  AIUI
>> this is because having a (potentially) user instantiable object
>> automatically object_new() other things breaks QOM lifetime rules.  I
>> can't say I understand this point terribly well though, but i know it
>> was something we went several rounds on during the spapr core work
>> though.
>>
>>>
>>>  } PnvChip;
>>>  
>>>  #define TYPE_POWERNV_MACHINE      "powernv-machine"
>>> @@ -43,9 +47,12 @@ typedef struct sPowerNVMachineState {
>>>  
>>>      uint32_t initrd_base;
>>>      long initrd_size;
>>> +    hwaddr fdt_addr;
>>>  
>>>      uint32_t  num_chips;
>>>      PnvChip   *chips;
>>>  } sPowerNVMachineState;
>>>  
>>> +#define PNV_TIMEBASE_FREQ           512000000ULL
>>> +
>>>  #endif /* _PPC_PNV_H */
>>> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
>>> new file mode 100644
>>> index 000000000000..88a09b0fd1c6
>>> --- /dev/null
>>> +++ b/include/hw/ppc/pnv_core.h
>>> @@ -0,0 +1,47 @@
>>> +/*
>>> + * QEMU PowerPC PowerNV CPU model
>>> + *
>>> + * Copyright (c) 2016 IBM Corporation.
>>> + *
>>> + * This library is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public License
>>> + * as published by the Free Software Foundation; either version 2 of
>>> + * the License, or (at your option) any later version.
>>> + *
>>> + * This library is distributed in the hope that it will be useful, but
>>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +#ifndef _PPC_PNV_CORE_H
>>> +#define _PPC_PNV_CORE_H
>>> +
>>> +#include "hw/cpu/core.h"
>>> +
>>> +#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"
>>> +#define POWERNV_CPU_CORE(obj) \
>>> +    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)
>>> +#define POWERNV_CPU_CLASS(klass) \
>>> +     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)
>>> +#define POWERNV_CPU_GET_CLASS(obj) \
>>> +     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)
>>> +
>>> +typedef struct PowerNVCPUCore {
>>> +    /*< private >*/
>>> +    CPUCore parent_obj;
>>> +
>>> +    /*< public >*/
>>> +    void *threads;
>>> +} PowerNVCPUCore;
>>> +
>>> +typedef struct PowerNVCPUClass {
>>> +    DeviceClass parent_class;
>>> +    ObjectClass *cpu_oc;
>>> +}   PowerNVCPUClass;
>>> +
>>> +extern char *powernv_cpu_core_typename(const char *model);
>>> +
>>> +#endif /* _PPC_PNV_CORE_H */
>>
David Gibson Aug. 29, 2016, 2:30 p.m. UTC | #5
On Fri, Aug 26, 2016 at 07:49:20PM +0200, Cédric Le Goater wrote:
> On 08/16/2016 04:39 AM, David Gibson wrote:
> > On Fri, Aug 05, 2016 at 11:15:37AM +0200, Cédric Le Goater wrote:
> >> This is largy inspired by sPAPRCPUCore with some simplification, no
> >> hotplug for instance. But the differences are small and the objects
> >> could possibly be merged.
> >>
> >> A set of PowerNVCPUCore objects is added to the PnvChip and the device
> >> tree is populated looping on these cores. Core ids in the device tree
> >> are still a little fuzy. To be checked.
> > 
> > So, it's not immediately obvious to me if you want an actual core
> > object.  You could potentially create the actual vcpu objects directly
> > from the chip object.  That assumes that any hotplug will only be at
> > chip granularity, not core granularity, but I'm guessing that's the
> > case anyway.
> > 
> > That said, if having the intermediate core object is helpful, you're
> > certainly free to have it.
> 
> I would like to find some common ground with the spapr core. It should
> be possible but for the sake of simplicity let's keep it that way.

TBH, I don't think there will be any value sharing anything with the
spapr core object specifically.  Note that there is already an
entirely generic 'cpu-core' type, which you should inherit from.

> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  hw/ppc/Makefile.objs      |   2 +-
> >>  hw/ppc/pnv.c              | 160 ++++++++++++++++++++++++++++++++++++++++++-
> >>  hw/ppc/pnv_core.c         | 171 ++++++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/pnv.h      |   7 ++
> >>  include/hw/ppc/pnv_core.h |  47 +++++++++++++
> >>  5 files changed, 383 insertions(+), 4 deletions(-)
> >>  create mode 100644 hw/ppc/pnv_core.c
> >>  create mode 100644 include/hw/ppc/pnv_core.h
> >>
> >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> >> index 8105db7d5600..f8c7d1db9ade 100644
> >> --- a/hw/ppc/Makefile.objs
> >> +++ b/hw/ppc/Makefile.objs
> >> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> >>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> >>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> >>  # IBM PowerNV
> >> -obj-$(CONFIG_POWERNV) += pnv.o
> >> +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
> >>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> >>  obj-y += spapr_pci_vfio.o
> >>  endif
> >> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> >> index a680780e9dea..1219493c7218 100644
> >> --- a/hw/ppc/pnv.c
> >> +++ b/hw/ppc/pnv.c
> >> @@ -35,6 +35,7 @@
> >>  #include "hw/ppc/fdt.h"
> >>  #include "hw/ppc/ppc.h"
> >>  #include "hw/ppc/pnv.h"
> >> +#include "hw/ppc/pnv_core.h"
> >>  #include "hw/loader.h"
> >>  #include "exec/address-spaces.h"
> >>  #include "qemu/cutils.h"
> >> @@ -112,6 +113,114 @@ static int powernv_populate_memory(void *fdt)
> >>      return 0;
> >>  }
> >>  
> >> +static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)
> >> +{
> >> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +    int smt_threads = ppc_get_compat_smt_threads(cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> >> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> >> +    uint32_t servers_prop[smt_threads];
> >> +    uint32_t gservers_prop[smt_threads * 2];
> >> +    int i, index = ppc_get_vcpu_dt_id(cpu);
> >> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> >> +                       0xffffffff, 0xffffffff};
> >> +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
> >> +    uint32_t cpufreq = 1000000000;
> >> +    uint32_t page_sizes_prop[64];
> >> +    size_t page_sizes_prop_size;
> >> +    char *nodename;
> >> +
> >> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> >> +
> >> +    _FDT((fdt_begin_node(fdt, nodename)));
> >> +
> >> +    g_free(nodename);
> >> +
> >> +    _FDT((fdt_property_cell(fdt, "reg", index)));
> >> +    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> > 
> > The handling of dt_id is going to collide with cleanups I want to do
> > in this area (for spapr and the ppc cpu core).  Not sure there's a lot
> > you can do to avoid that at this stage.
> 
> We will adapt. When I reworked the device tree to use the "rw" functions, 
> I took a closer look at the powernv needs. Nothing alarming.

Using "rw" throught will certainly make the conversion easier.
Merging the "sw" and "rw" portions is one of the motivations for this
cleanup on spapr.

> The cores are numbered in the (processor) chip. The maximum number of 
> cores depends on the model (the max of the max is 12 for Venice) and 
> they are not contiguous so we should be activating them with a bitmap 
> per chip. 

Ok, this will need some caution to get sane cpu_index values.
However, this isn't urgent until you get to the point of supporting
migration across versions.

> The core id plus the chip id make a PIR, which is what we use as a dt_id.
> For example :
> 
> 	PowerPC,POWER8@8a8/ibm,chip-id
> 			 00000011 (17)
> 	PowerPC,POWER8@8a8/reg
> 			 000008a8 (2216)
> 	PowerPC,POWER8@8a8/ibm,pir
> 			 000008a8 (2216)	
> 
> Ben provided a good summary in skiboot, here :
> 
> 	https://github.com/open-power/skiboot/blob/master/include/chip.h
> 
> May be we can find a good formula for cpu->cpu_dt_id. to be studied.

Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
point, in favour of having the machine type construct the id when it
actually builds the dt.  It's not really a cpu level construct.

> >> +    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> >> +    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
> >> +                            env->dcache_line_size)));
> >> +    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
> >> +                            env->dcache_line_size)));
> >> +    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
> >> +                            env->icache_line_size)));
> >> +    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
> >> +                            env->icache_line_size)));
> >> +
> >> +    if (pcc->l1_dcache_size) {
> >> +        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> >> +    } else {
> >> +        error_report("Warning: Unknown L1 dcache size for cpu");
> >> +    }
> >> +    if (pcc->l1_icache_size) {
> >> +        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
> >> +    } else {
> >> +        error_report("Warning: Unknown L1 icache size for cpu");
> >> +    }
> >> +
> >> +    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> >> +    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> >> +    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> >> +    _FDT((fdt_property_string(fdt, "status", "okay")));
> >> +    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> >> +
> >> +    if (env->spr_cb[SPR_PURR].oea_read) {
> >> +        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
> >> +    }
> >> +
> >> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> >> +        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
> >> +                           segs, sizeof(segs))));
> >> +    }
> >> +
> >> +    /* Advertise VMX/VSX (vector extensions) if available
> >> +     *   0 / no property == no vector extensions
> >> +     *   1               == VMX / Altivec available
> >> +     *   2               == VSX available */
> >> +    if (env->insns_flags & PPC_ALTIVEC) {
> >> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> >> +
> >> +        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
> >> +    }
> >> +
> >> +    /* Advertise DFP (Decimal Floating Point) if available
> >> +     *   0 / no property == no DFP
> >> +     *   1               == DFP available */
> >> +    if (env->insns_flags2 & PPC2_DFP) {
> >> +        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
> >> +    }
> >> +
> >> +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
> >> +                                                  sizeof(page_sizes_prop));
> >> +    if (page_sizes_prop_size) {
> >> +        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
> >> +                           page_sizes_prop, page_sizes_prop_size)));
> >> +    }
> >> +
> >> +    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
> >> +
> >> +    if (cpu->cpu_version) {
> >> +        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
> >> +    }
> >> +
> >> +    /* Build interrupt servers and gservers properties */
> >> +    for (i = 0; i < smt_threads; i++) {
> >> +        servers_prop[i] = cpu_to_be32(index + i);
> >> +        /* Hack, direct the group queues back to cpu 0 */
> >> +        gservers_prop[i * 2] = cpu_to_be32(index + i);
> >> +        gservers_prop[i * 2 + 1] = 0;
> >> +    }
> >> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> >> +                       servers_prop, sizeof(servers_prop))));
> >> +    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> >> +                       gservers_prop, sizeof(gservers_prop))));
> >> +
> >> +    _FDT((fdt_end_node(fdt)));
> >> +}
> >> +
> >>  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
> >>                                  const char *kernel_cmdline)
> >>  {
> >> @@ -120,6 +229,7 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
> >>      uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
> >>      char *buf;
> >>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
> >> +    int i;
> >>  
> >>      fdt = g_malloc0(FDT_MAX_SIZE);
> >>      _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
> >> @@ -156,6 +266,23 @@ static void *powernv_create_fdt(sPowerNVMachineState *pnv,
> >>      /* Memory */
> >>      _FDT((powernv_populate_memory(fdt)));
> >>  
> >> +    /* cpus */
> >> +    _FDT((fdt_begin_node(fdt, "cpus")));
> >> +    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> >> +    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> >> +
> >> +    for (i = 0; i < pnv->num_chips; i++) {
> >> +        PnvChip *chip = &pnv->chips[i];
> >> +        int j;
> >> +
> >> +        for (j = 0; j < chip->num_cores; j++) {
> >> +            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);
> >> +            CPUState *cs = CPU(DEVICE(pc->threads));
> >> +            powernv_create_core_node(fdt, cs, chip->chip_id);
> > 
> > I think it would be nicer to define the fdt creation function in terms
> > of the core object, and have it retrieve the representative thread itself.
> 
> ok. yes, I will try to refresh that.
> 
> >> +        }
> >> +    }
> >> +    _FDT((fdt_end_node(fdt)));
> >> +
> >>      _FDT((fdt_end_node(fdt))); /* close root node */
> >>      _FDT((fdt_finish(fdt)));
> >>  
> >> @@ -168,11 +295,13 @@ static void ppc_powernv_reset(void)
> >>      sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
> >>      void *fdt;
> >>  
> >> +    pnv->fdt_addr = FDT_ADDR;
> >> +
> >>      qemu_devices_reset();
> >>  
> >>      fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
> >>  
> >> -    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
> >> +    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));
> > 
> > These look like not really related changes, that should be maybe
> > folded into 1/3.
> 
> yes.
> 
> >>  }
> >>  
> >>  static void ppc_powernv_init(MachineState *machine)
> >> @@ -252,6 +381,10 @@ static void ppc_powernv_init(MachineState *machine)
> >>  
> >>          object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);
> >>          object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);
> >> +        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",
> >> +                                &error_abort);
> >> +        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",
> >> +                                &error_abort);
> > 
> > So various drafts of the spapr cores had a cpu-model parameter, but we
> > eventually rejected it in favour of having different core types
> > corresponding to the different, well, core types.  Unless there's a
> > compelling reason otherwise, I think it would be nicer to do the same
> > for the pnvchip objects - a p8pnvchip object will always create p8
> > cores and so forth.
> 
> yes. This is fixed in the current patchset. 
> 
> >>          object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);
> >>      }
> >>  }
> >> @@ -292,14 +425,35 @@ static const TypeInfo powernv_machine_2_8_info = {
> >>      .class_init    = powernv_machine_2_8_class_init,
> >>  };
> >>  
> >> -
> >>  static void pnv_chip_realize(DeviceState *dev, Error **errp)
> >>  {
> >> -    ;
> >> +    int i;
> >> +    PnvChip *chip = PNV_CHIP(dev);
> >> +    char *typename = powernv_cpu_core_typename(chip->cpu_model);
> >> +
> >> +    if (!object_class_by_name(typename)) {
> >> +        error_setg(errp, "Unable to find PowerNV CPU Core definition");
> >> +        return;
> >> +    }
> >> +
> >> +    chip->cores = g_new0(Object *, chip->num_cores);
> >> +    for (i = 0; i < chip->num_cores; i++) {
> >> +        int core_id = i * smp_threads;
> >> +        chip->cores[i] = object_new(typename);
> >> +        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",
> >> +                                &error_fatal);
> >> +        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,
> >> +                                &error_fatal);
> >> +        object_property_set_bool(chip->cores[i], true, "realized",
> >> +                                 &error_fatal);
> >> +    }
> >> +    g_free(typename);
> >>  }
> >>  
> >>  static Property pnv_chip_properties[] = {
> >>      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
> >> +    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),
> >> +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
> >>      DEFINE_PROP_END_OF_LIST(),
> >>  };
> >>  
> >> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> >> new file mode 100644
> >> index 000000000000..1e36709db993
> >> --- /dev/null
> >> +++ b/hw/ppc/pnv_core.c
> >> @@ -0,0 +1,171 @@
> >> +/*
> >> + * QEMU PowerPC PowerNV CPU model
> >> + *
> >> + * Copyright (c) IBM Corporation.
> >> + *
> >> + * This library is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public License
> >> + * as published by the Free Software Foundation; either version 2 of
> >> + * the License, or (at your option) any later version.
> >> + *
> >> + * This library is distributed in the hope that it will be useful, but
> >> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> >> + */
> >> +#include "qemu/osdep.h"
> >> +#include "sysemu/sysemu.h"
> >> +#include "qapi/error.h"
> >> +#include "target-ppc/cpu.h"
> >> +#include "hw/ppc/ppc.h"
> >> +#include "hw/ppc/pnv.h"
> >> +#include "hw/ppc/pnv_core.h"
> >> +
> >> +static void powernv_cpu_reset(void *opaque)
> >> +{
> >> +    PowerPCCPU *cpu = opaque;
> >> +    CPUState *cs = CPU(cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +    MachineState *machine = MACHINE(qdev_get_machine());
> >> +    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
> >> +
> >> +    cpu_reset(cs);
> >> +
> >> +    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
> > 
> > Is the PIR writable?  If not it might be better to move this to init
> > rather than reset time.
> 
> It should not. OK.
> 
> >> +    env->spr[SPR_HIOR] = 0;
> >> +    env->gpr[3] = pnv->fdt_addr;
> >> +    env->nip = 0x10;
> >> +    env->msr |= MSR_HVB;
> >> +}
> >> +
> >> +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
> >> +{
> >> +    CPUPPCState *env = &cpu->env;
> >> +
> >> +    /* Set time-base frequency to 512 MHz */
> >> +    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
> >> +
> >> +    /* MSR[IP] doesn't exist nowadays */
> >> +    env->msr_mask &= ~(1 << 6);
> >> +
> >> +    qemu_register_reset(powernv_cpu_reset, cpu);
> >> +    powernv_cpu_reset(cpu);
> >> +}
> >> +
> >> +static void powernv_cpu_core_realize_child(Object *child, Error **errp)
> >> +{
> >> +    Error *local_err = NULL;
> >> +    CPUState *cs = CPU(child);
> >> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +
> >> +    object_property_set_bool(child, true, "realized", &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return;
> >> +    }
> >> +
> >> +    powernv_cpu_init(cpu, &local_err);
> >> +    if (local_err) {
> >> +        error_propagate(errp, local_err);
> >> +        return;
> >> +    }
> >> +}
> >> +
> >> +static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));
> >> +    CPUCore *cc = CPU_CORE(OBJECT(dev));
> >> +    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));
> >> +    const char *typename = object_class_get_name(pcc->cpu_oc);
> >> +    size_t size = object_type_get_instance_size(typename);
> >> +    Error *local_err = NULL;
> >> +    void *obj;
> >> +    int i, j;
> >> +
> >> +
> >> +    pc->threads = g_malloc0(size * cc->nr_threads);
> >> +    for (i = 0; i < cc->nr_threads; i++) {
> >> +        char id[32];
> >> +        CPUState *cs;
> >> +
> >> +        obj = pc->threads + i * size;
> >> +
> >> +        object_initialize(obj, size, typename);
> >> +        cs = CPU(obj);
> >> +        cs->cpu_index = cc->core_id + i;
> >> +        snprintf(id, sizeof(id), "thread[%d]", i);
> >> +        object_property_add_child(OBJECT(pc), id, obj, &local_err);
> >> +        if (local_err) {
> >> +            goto err;
> >> +        }
> >> +        object_unref(obj);
> >> +    }
> >> +
> >> +    for (j = 0; j < cc->nr_threads; j++) {
> >> +        obj = pc->threads + j * size;
> >> +
> >> +        powernv_cpu_core_realize_child(obj, &local_err);
> >> +        if (local_err) {
> >> +            goto err;
> >> +        }
> >> +    }
> >> +    return;
> >> +
> >> +err:
> >> +    while (--i >= 0) {
> >> +        obj = pc->threads + i * size;
> >> +        object_unparent(obj);
> >> +    }
> >> +    g_free(pc->threads);
> >> +    error_propagate(errp, local_err);
> >> +}
> >> +
> >> +/*
> >> + * Grow this list or merge with SPAPRCoreInfo which is very similar
> >> + */
> >> +static const char *powernv_core_models[] = { "POWER8" };
> >> +
> >> +static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(oc);
> >> +    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);
> >> +
> >> +    dc->realize = powernv_cpu_core_realize;
> >> +    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
> >> +}
> >> +
> >> +static const TypeInfo powernv_cpu_core_info = {
> >> +    .name           = TYPE_POWERNV_CPU_CORE,
> >> +    .parent         = TYPE_CPU_CORE,
> >> +    .instance_size  = sizeof(PowerNVCPUCore),
> >> +    .class_size     = sizeof(PowerNVCPUClass),
> >> +    .abstract       = true,
> >> +};
> >> +
> >> +static void powernv_cpu_core_register_types(void)
> >> +{
> >> +    int i ;
> >> +
> >> +    type_register_static(&powernv_cpu_core_info);
> >> +    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {
> >> +        TypeInfo ti = {
> >> +            .parent = TYPE_POWERNV_CPU_CORE,
> >> +            .instance_size = sizeof(PowerNVCPUCore),
> >> +            .class_init = powernv_cpu_core_class_init,
> >> +            .class_data = (void *) powernv_core_models[i],
> >> +        };
> >> +        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);
> >> +        type_register(&ti);
> >> +        g_free((void *)ti.name);
> >> +    }
> >> +}
> >> +
> >> +type_init(powernv_cpu_core_register_types)
> >> +
> >> +char *powernv_cpu_core_typename(const char *model)
> >> +{
> >> +    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);
> >> +}
> >> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> >> index 6907dc9e5c3d..9eac4b34a9b0 100644
> >> --- a/include/hw/ppc/pnv.h
> >> +++ b/include/hw/ppc/pnv.h
> >> @@ -31,6 +31,10 @@ typedef struct PnvChip {
> >>  
> >>      /*< public >*/
> >>      uint32_t     chip_id;
> >> +    uint32_t     num_cores;
> >> +    char *cpu_model;
> >> +
> >> +    Object **cores;
> > 
> > So, unlike the chips within the machine, the cores within the chip
> > should probably be done like the threads within the core - a single
> > array with object_initialize() rather than an array of pointers.  AIUI
> > this is because having a (potentially) user instantiable object
> > automatically object_new() other things breaks QOM lifetime rules.  I
> > can't say I understand this point terribly well though, but i know it
> > was something we went several rounds on during the spapr core work
> > though.
> 
> ok. I will look into it if this is possible, but I see : 
> 
> 	struct sPAPRMachineState {
> 	    ...
> 	    Object **cores;
> 	};
> 
> So spapr is not instantiating cores in the prefered way ? That does
> mean pnv should do the same.

So, the difference here is that while the spapr machine keeps track of
the cores, they're instantiated directly by the user with -device, and
they can be dynamically added or removed with device_{add,del}.

Well.. in practice for backwards compatibility spapr does instantiate
the initial CPUs.

For the cores in pnv, however, IIUC, the lifetime is locked to the
lifetime of the chips - you can't dynamically add or remove a core
from a chip.

Another way to look at it is that the relationship of pnv cores to pnv
chips is somewhat like the relationship of papr threads to papr
cores.  The relationship of papr cores to the papr machine is instead
mirrored in the relationship of pnv *chips* to the pnv machine.

> 
> Thanks,
> 
> C. 
> 
> 
> >>  } PnvChip;
> >>  
> >>  #define TYPE_POWERNV_MACHINE      "powernv-machine"
> >> @@ -43,9 +47,12 @@ typedef struct sPowerNVMachineState {
> >>  
> >>      uint32_t initrd_base;
> >>      long initrd_size;
> >> +    hwaddr fdt_addr;
> >>  
> >>      uint32_t  num_chips;
> >>      PnvChip   *chips;
> >>  } sPowerNVMachineState;
> >>  
> >> +#define PNV_TIMEBASE_FREQ           512000000ULL
> >> +
> >>  #endif /* _PPC_PNV_H */
> >> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
> >> new file mode 100644
> >> index 000000000000..88a09b0fd1c6
> >> --- /dev/null
> >> +++ b/include/hw/ppc/pnv_core.h
> >> @@ -0,0 +1,47 @@
> >> +/*
> >> + * QEMU PowerPC PowerNV CPU model
> >> + *
> >> + * Copyright (c) 2016 IBM Corporation.
> >> + *
> >> + * This library is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public License
> >> + * as published by the Free Software Foundation; either version 2 of
> >> + * the License, or (at your option) any later version.
> >> + *
> >> + * This library is distributed in the hope that it will be useful, but
> >> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> >> + */
> >> +#ifndef _PPC_PNV_CORE_H
> >> +#define _PPC_PNV_CORE_H
> >> +
> >> +#include "hw/cpu/core.h"
> >> +
> >> +#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"
> >> +#define POWERNV_CPU_CORE(obj) \
> >> +    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)
> >> +#define POWERNV_CPU_CLASS(klass) \
> >> +     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)
> >> +#define POWERNV_CPU_GET_CLASS(obj) \
> >> +     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)
> >> +
> >> +typedef struct PowerNVCPUCore {
> >> +    /*< private >*/
> >> +    CPUCore parent_obj;
> >> +
> >> +    /*< public >*/
> >> +    void *threads;
> >> +} PowerNVCPUCore;
> >> +
> >> +typedef struct PowerNVCPUClass {
> >> +    DeviceClass parent_class;
> >> +    ObjectClass *cpu_oc;
> >> +}   PowerNVCPUClass;
> >> +
> >> +extern char *powernv_cpu_core_typename(const char *model);
> >> +
> >> +#endif /* _PPC_PNV_CORE_H */
> > 
>
Benjamin Herrenschmidt Aug. 30, 2016, 6:15 a.m. UTC | #6
On Mon, 2016-08-29 at 10:30 -0400, David Gibson wrote:
> 
> Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
> point, in favour of having the machine type construct the id when it
> actually builds the dt.  It's not really a cpu level construct.

On PowerNV it is as it's equal to the PIR, the HW interrupt server,
etc...

Cheers,
Ben.
David Gibson Aug. 30, 2016, 6:28 a.m. UTC | #7
On Tue, Aug 30, 2016 at 04:15:35PM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2016-08-29 at 10:30 -0400, David Gibson wrote:
> > 
> > Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
> > point, in favour of having the machine type construct the id when it
> > actually builds the dt.  It's not really a cpu level construct.
> 
> On PowerNV it is as it's equal to the PIR, the HW interrupt server,
> etc...

No.. the PIR itself is a cpu level construct (and we already have a
place for that in the cpu state).  The DT id as such isn't, although
it happens to have the same value.  The fact it has the same value is
itself a machine type property.

[Aside: removing dt_id from the cpu will require disentangling it from
the kvm vcpu id]
Cédric Le Goater Aug. 30, 2016, 7:23 a.m. UTC | #8
On 08/30/2016 08:15 AM, Benjamin Herrenschmidt wrote:
> On Mon, 2016-08-29 at 10:30 -0400, David Gibson wrote:
>>
>> Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
>> point, in favour of having the machine type construct the id when it
>> actually builds the dt.  It's not really a cpu level construct.

From my understanding, cs->cpu_index is becoming the main CPU identifier.
sPAPRCPUCore assigns it :

	cs->cpu_index = cc->core_id + i

which I reused in PnvCPUCore to hold the real HW identifiers. 
ppc_get_vcpu_by_dt_id() can also safely use cs->cpu_index I think. 

So pnv mostly work without ->cpu_dt_id but there is :

> On PowerNV it is as it's equal to the PIR, the HW interrupt server,
> etc...

xics in the way ... Now that pnv uses real hw core ids, it is 
interesting to see how lost it gets without a cpu with index=0 ...

The most obvious issue is the way we look for the ICPState of a cpu :

    ICPState *ss = &xics->ss[cs->cpu_index];

how about introducing a helper like (this one I hacked) :

+ICPState *xics_find_icp(XICSState *xics, int cpu_index)
+{
+    int i;
+
+    for (i = 0 ; i < xics->nr_servers; i++) {
+        ICPState *ss = &xics->ss[i];
+        if (ss->cs && ss->cs->cpu_index == cpu_index)
+            return ss;
+    }
+
+    return NULL;
+}
+

That might have been already discussed on the mailing list ? 

Thanks,

C.
Benjamin Herrenschmidt Aug. 31, 2016, 1:06 a.m. UTC | #9
On Tue, 2016-08-30 at 02:28 -0400, David Gibson wrote:
> No.. the PIR itself is a cpu level construct (and we already have a
> place for that in the cpu state).  The DT id as such isn't, although
> it happens to have the same value.  The fact it has the same value is
> itself a machine type property.
> 
> [Aside: removing dt_id from the cpu will require disentangling it from
> the kvm vcpu id]

On P8 and P9 the PIR of a thread is a chip property, as it encodes
the HW node, chip, core and thread ID (hint: it's not 0 based on P8,
well the core isn't).

So it has to match accordingly for things like core XSCOMs which we
want to start supporting some of.

It also has to match what's in the device-tree as a pretty standard
requirement of all powerpc device-trees.

Finally it also happen to be the target interrupt server on all known
implementations.

Cheers,
Ben.
David Gibson Sept. 5, 2016, 1:45 a.m. UTC | #10
On Tue, Aug 30, 2016 at 09:23:40AM +0200, Cédric Le Goater wrote:
> On 08/30/2016 08:15 AM, Benjamin Herrenschmidt wrote:
> > On Mon, 2016-08-29 at 10:30 -0400, David Gibson wrote:
> >>
> >> Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
> >> point, in favour of having the machine type construct the id when it
> >> actually builds the dt.  It's not really a cpu level construct.
> 
> >From my understanding, cs->cpu_index is becoming the main CPU identifier.
> sPAPRCPUCore assigns it :
> 
> 	cs->cpu_index = cc->core_id + i

Uh.. yes and no.  It's the main internal identifier, and it's become
stable at least on platforms which support cpu hotplug.  This means
that it should be possible to derive any other platform specific
identifiers from cpu_index in a consistent way.

However, cpu_index values must still lie in the range 0..max_cpus-1,
which means it's not suitable for directly representing non-contiguous
platform identifiers.

> which I reused in PnvCPUCore to hold the real HW identifiers. 
> ppc_get_vcpu_by_dt_id() can also safely use cs->cpu_index I think. 

Well, there's a different function for getting a cpu by cpu_index.

> So pnv mostly work without ->cpu_dt_id but there is :
> 
> > On PowerNV it is as it's equal to the PIR, the HW interrupt server,
> > etc...
> 
> xics in the way ... Now that pnv uses real hw core ids, it is 
> interesting to see how lost it gets without a cpu with index=0 ...
> 
> The most obvious issue is the way we look for the ICPState of a cpu :
> 
>     ICPState *ss = &xics->ss[cs->cpu_index];
> 
> how about introducing a helper like (this one I hacked) :
> 
> +ICPState *xics_find_icp(XICSState *xics, int cpu_index)
> +{
> +    int i;
> +
> +    for (i = 0 ; i < xics->nr_servers; i++) {
> +        ICPState *ss = &xics->ss[i];
> +        if (ss->cs && ss->cs->cpu_index == cpu_index)
> +            return ss;
> +    }
> +
> +    return NULL;
> +}

That's probably reasonable.

> +
> 
> That might have been already discussed on the mailing list ? 
> 
> Thanks,
> 
> C.
>
Cédric Le Goater Sept. 6, 2016, 7:45 a.m. UTC | #11
On 09/05/2016 03:45 AM, David Gibson wrote:
> On Tue, Aug 30, 2016 at 09:23:40AM +0200, Cédric Le Goater wrote:
>> On 08/30/2016 08:15 AM, Benjamin Herrenschmidt wrote:
>>> On Mon, 2016-08-29 at 10:30 -0400, David Gibson wrote:
>>>>
>>>> Possibly.  In fact, I'm planning to eliminate cpu->cpu_dt_id at some
>>>> point, in favour of having the machine type construct the id when it
>>>> actually builds the dt.  It's not really a cpu level construct.
>>
>> >From my understanding, cs->cpu_index is becoming the main CPU identifier.
>> sPAPRCPUCore assigns it :
>>
>> 	cs->cpu_index = cc->core_id + i
> 
> Uh.. yes and no.  It's the main internal identifier, and it's become
> stable at least on platforms which support cpu hotplug.  This means
> that it should be possible to derive any other platform specific
> identifiers from cpu_index in a consistent way.
> 
> However, cpu_index values must still lie in the range 0..max_cpus-1,
> which means it's not suitable for directly representing non-contiguous
> platform identifiers.

ah ok. So I might be doing something wrong on the pnv platform. As
cc->core_id contains the cpu pir and cs->cpu_index is assigned doing:

	cs->cpu_index = cc->core_id + i

It is useful to compute the threads pir but causes some issues in xics.
These are solved with a couple of helpers to look for the ICPState* 
using the CPUState* as an index. 

I will see if I can adapt pnv to use a contiguous cpu_index. 

Thanks,

C.
diff mbox

Patch

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 8105db7d5600..f8c7d1db9ade 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -6,7 +6,7 @@  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
 obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
 # IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index a680780e9dea..1219493c7218 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -35,6 +35,7 @@ 
 #include "hw/ppc/fdt.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
 #include "hw/loader.h"
 #include "exec/address-spaces.h"
 #include "qemu/cutils.h"
@@ -112,6 +113,114 @@  static int powernv_populate_memory(void *fdt)
     return 0;
 }
 
+static void powernv_create_core_node(void *fdt, CPUState *cs, uint32_t chip_id)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int smt_threads = ppc_get_compat_smt_threads(cpu);
+    CPUPPCState *env = &cpu->env;
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    uint32_t servers_prop[smt_threads];
+    uint32_t gservers_prop[smt_threads * 2];
+    int i, index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
+    uint32_t cpufreq = 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    char *nodename;
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+
+    _FDT((fdt_begin_node(fdt, nodename)));
+
+    g_free(nodename);
+
+    _FDT((fdt_property_cell(fdt, "reg", index)));
+    _FDT((fdt_property_string(fdt, "device_type", "cpu")));
+
+    _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_property_cell(fdt, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_property_cell(fdt, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_property_cell(fdt, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_property_cell(fdt, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
+    } else {
+        error_report("Warning: Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
+    } else {
+        error_report("Warning: Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
+    _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
+    _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_property_string(fdt, "status", "okay")));
+    _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_property_cell(fdt, "ibm,chip-id", chip_id)));
+
+    if (cpu->cpu_version) {
+        _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version)));
+    }
+
+    /* Build interrupt servers and gservers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(index + i);
+        /* Hack, direct the group queues back to cpu 0 */
+        gservers_prop[i * 2] = cpu_to_be32(index + i);
+        gservers_prop[i * 2 + 1] = 0;
+    }
+    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop))));
+    _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+                       gservers_prop, sizeof(gservers_prop))));
+
+    _FDT((fdt_end_node(fdt)));
+}
+
 static void *powernv_create_fdt(sPowerNVMachineState *pnv,
                                 const char *kernel_cmdline)
 {
@@ -120,6 +229,7 @@  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
     uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
     char *buf;
     const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    int i;
 
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
@@ -156,6 +266,23 @@  static void *powernv_create_fdt(sPowerNVMachineState *pnv,
     /* Memory */
     _FDT((powernv_populate_memory(fdt)));
 
+    /* cpus */
+    _FDT((fdt_begin_node(fdt, "cpus")));
+    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
+    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        PnvChip *chip = &pnv->chips[i];
+        int j;
+
+        for (j = 0; j < chip->num_cores; j++) {
+            PowerNVCPUCore *pc = POWERNV_CPU_CORE(chip->cores[j]);
+            CPUState *cs = CPU(DEVICE(pc->threads));
+            powernv_create_core_node(fdt, cs, chip->chip_id);
+        }
+    }
+    _FDT((fdt_end_node(fdt)));
+
     _FDT((fdt_end_node(fdt))); /* close root node */
     _FDT((fdt_finish(fdt)));
 
@@ -168,11 +295,13 @@  static void ppc_powernv_reset(void)
     sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
     void *fdt;
 
+    pnv->fdt_addr = FDT_ADDR;
+
     qemu_devices_reset();
 
     fdt = powernv_create_fdt(pnv, machine->kernel_cmdline);
 
-    cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt));
+    cpu_physical_memory_write(pnv->fdt_addr, fdt, fdt_totalsize(fdt));
 }
 
 static void ppc_powernv_init(MachineState *machine)
@@ -252,6 +381,10 @@  static void ppc_powernv_init(MachineState *machine)
 
         object_initialize(chip, sizeof(*chip), TYPE_PNV_CHIP);
         object_property_set_int(OBJECT(chip), i, "chip-id", &error_abort);
+        object_property_set_int(OBJECT(chip), smp_cpus, "num-cores",
+                                &error_abort);
+        object_property_set_str(OBJECT(chip), machine->cpu_model, "cpu-model",
+                                &error_abort);
         object_property_set_bool(OBJECT(chip), true, "realized", &error_abort);
     }
 }
@@ -292,14 +425,35 @@  static const TypeInfo powernv_machine_2_8_info = {
     .class_init    = powernv_machine_2_8_class_init,
 };
 
-
 static void pnv_chip_realize(DeviceState *dev, Error **errp)
 {
-    ;
+    int i;
+    PnvChip *chip = PNV_CHIP(dev);
+    char *typename = powernv_cpu_core_typename(chip->cpu_model);
+
+    if (!object_class_by_name(typename)) {
+        error_setg(errp, "Unable to find PowerNV CPU Core definition");
+        return;
+    }
+
+    chip->cores = g_new0(Object *, chip->num_cores);
+    for (i = 0; i < chip->num_cores; i++) {
+        int core_id = i * smp_threads;
+        chip->cores[i] = object_new(typename);
+        object_property_set_int(chip->cores[i], smp_threads, "nr-threads",
+                                &error_fatal);
+        object_property_set_int(chip->cores[i], core_id, CPU_CORE_PROP_CORE_ID,
+                                &error_fatal);
+        object_property_set_bool(chip->cores[i], true, "realized",
+                                 &error_fatal);
+    }
+    g_free(typename);
 }
 
 static Property pnv_chip_properties[] = {
     DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
+    DEFINE_PROP_STRING("cpu-model", PnvChip, cpu_model),
+    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
new file mode 100644
index 000000000000..1e36709db993
--- /dev/null
+++ b/hw/ppc/pnv_core.c
@@ -0,0 +1,171 @@ 
+/*
+ * QEMU PowerPC PowerNV CPU model
+ *
+ * Copyright (c) IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "target-ppc/cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+
+static void powernv_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    MachineState *machine = MACHINE(qdev_get_machine());
+    sPowerNVMachineState *pnv = POWERNV_MACHINE(machine);
+
+    cpu_reset(cs);
+
+    env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu);
+    env->spr[SPR_HIOR] = 0;
+    env->gpr[3] = pnv->fdt_addr;
+    env->nip = 0x10;
+    env->msr |= MSR_HVB;
+}
+
+static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
+
+    /* MSR[IP] doesn't exist nowadays */
+    env->msr_mask &= ~(1 << 6);
+
+    qemu_register_reset(powernv_cpu_reset, cpu);
+    powernv_cpu_reset(cpu);
+}
+
+static void powernv_cpu_core_realize_child(Object *child, Error **errp)
+{
+    Error *local_err = NULL;
+    CPUState *cs = CPU(child);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    object_property_set_bool(child, true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    powernv_cpu_init(cpu, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static void powernv_cpu_core_realize(DeviceState *dev, Error **errp)
+{
+    PowerNVCPUCore *pc = POWERNV_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(OBJECT(dev));
+    PowerNVCPUClass *pcc = POWERNV_CPU_GET_CLASS(OBJECT(dev));
+    const char *typename = object_class_get_name(pcc->cpu_oc);
+    size_t size = object_type_get_instance_size(typename);
+    Error *local_err = NULL;
+    void *obj;
+    int i, j;
+
+
+    pc->threads = g_malloc0(size * cc->nr_threads);
+    for (i = 0; i < cc->nr_threads; i++) {
+        char id[32];
+        CPUState *cs;
+
+        obj = pc->threads + i * size;
+
+        object_initialize(obj, size, typename);
+        cs = CPU(obj);
+        cs->cpu_index = cc->core_id + i;
+        snprintf(id, sizeof(id), "thread[%d]", i);
+        object_property_add_child(OBJECT(pc), id, obj, &local_err);
+        if (local_err) {
+            goto err;
+        }
+        object_unref(obj);
+    }
+
+    for (j = 0; j < cc->nr_threads; j++) {
+        obj = pc->threads + j * size;
+
+        powernv_cpu_core_realize_child(obj, &local_err);
+        if (local_err) {
+            goto err;
+        }
+    }
+    return;
+
+err:
+    while (--i >= 0) {
+        obj = pc->threads + i * size;
+        object_unparent(obj);
+    }
+    g_free(pc->threads);
+    error_propagate(errp, local_err);
+}
+
+/*
+ * Grow this list or merge with SPAPRCoreInfo which is very similar
+ */
+static const char *powernv_core_models[] = { "POWER8" };
+
+static void powernv_cpu_core_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerNVCPUClass *pcc = POWERNV_CPU_CLASS(oc);
+
+    dc->realize = powernv_cpu_core_realize;
+    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
+}
+
+static const TypeInfo powernv_cpu_core_info = {
+    .name           = TYPE_POWERNV_CPU_CORE,
+    .parent         = TYPE_CPU_CORE,
+    .instance_size  = sizeof(PowerNVCPUCore),
+    .class_size     = sizeof(PowerNVCPUClass),
+    .abstract       = true,
+};
+
+static void powernv_cpu_core_register_types(void)
+{
+    int i ;
+
+    type_register_static(&powernv_cpu_core_info);
+    for (i = 0; i < ARRAY_SIZE(powernv_core_models); ++i) {
+        TypeInfo ti = {
+            .parent = TYPE_POWERNV_CPU_CORE,
+            .instance_size = sizeof(PowerNVCPUCore),
+            .class_init = powernv_cpu_core_class_init,
+            .class_data = (void *) powernv_core_models[i],
+        };
+        ti.name = powernv_cpu_core_typename(powernv_core_models[i]);
+        type_register(&ti);
+        g_free((void *)ti.name);
+    }
+}
+
+type_init(powernv_cpu_core_register_types)
+
+char *powernv_cpu_core_typename(const char *model)
+{
+    return g_strdup_printf("%s-" TYPE_POWERNV_CPU_CORE, model);
+}
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 6907dc9e5c3d..9eac4b34a9b0 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -31,6 +31,10 @@  typedef struct PnvChip {
 
     /*< public >*/
     uint32_t     chip_id;
+    uint32_t     num_cores;
+    char *cpu_model;
+
+    Object **cores;
 } PnvChip;
 
 #define TYPE_POWERNV_MACHINE      "powernv-machine"
@@ -43,9 +47,12 @@  typedef struct sPowerNVMachineState {
 
     uint32_t initrd_base;
     long initrd_size;
+    hwaddr fdt_addr;
 
     uint32_t  num_chips;
     PnvChip   *chips;
 } sPowerNVMachineState;
 
+#define PNV_TIMEBASE_FREQ           512000000ULL
+
 #endif /* _PPC_PNV_H */
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
new file mode 100644
index 000000000000..88a09b0fd1c6
--- /dev/null
+++ b/include/hw/ppc/pnv_core.h
@@ -0,0 +1,47 @@ 
+/*
+ * QEMU PowerPC PowerNV CPU model
+ *
+ * Copyright (c) 2016 IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PPC_PNV_CORE_H
+#define _PPC_PNV_CORE_H
+
+#include "hw/cpu/core.h"
+
+#define TYPE_POWERNV_CPU_CORE "powernv-cpu-core"
+#define POWERNV_CPU_CORE(obj) \
+    OBJECT_CHECK(PowerNVCPUCore, (obj), TYPE_POWERNV_CPU_CORE)
+#define POWERNV_CPU_CLASS(klass) \
+     OBJECT_CLASS_CHECK(PowerNVCPUClass, (klass), TYPE_POWERNV_CPU_CORE)
+#define POWERNV_CPU_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(PowerNVCPUClass, (obj), TYPE_POWERNV_CPU_CORE)
+
+typedef struct PowerNVCPUCore {
+    /*< private >*/
+    CPUCore parent_obj;
+
+    /*< public >*/
+    void *threads;
+} PowerNVCPUCore;
+
+typedef struct PowerNVCPUClass {
+    DeviceClass parent_class;
+    ObjectClass *cpu_oc;
+}   PowerNVCPUClass;
+
+extern char *powernv_cpu_core_typename(const char *model);
+
+#endif /* _PPC_PNV_CORE_H */