diff mbox

[RFC,v2,05/23] spapr: Reorganize CPU dt generation code

Message ID 1427117764-23008-6-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao March 23, 2015, 1:35 p.m. UTC
Reorganize CPU device tree generation code so that it be reused from
hotplug path. CPU dt entries are now generated from spapr_finalize_fdt()
instead of spapr_create_fdt_skel().

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c | 288 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 154 insertions(+), 134 deletions(-)

Comments

David Gibson March 25, 2015, 1:36 a.m. UTC | #1
On Mon, Mar 23, 2015 at 07:05:46PM +0530, Bharata B Rao wrote:
> Reorganize CPU device tree generation code so that it be reused from
> hotplug path. CPU dt entries are now generated from spapr_finalize_fdt()
> instead of spapr_create_fdt_skel().
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c | 288 ++++++++++++++++++++++++++++++---------------------------
>  1 file changed, 154 insertions(+), 134 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 36ff754..1a25cc0 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -206,24 +206,50 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
>      return ret;
>  }
>  
> +static int spapr_fixup_cpu_numa_smt_dt(void *fdt, int offset, CPUState *cs,
> +                                        sPAPREnvironment *spapr)
> +{
> +    int ret;
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    int index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    uint32_t associativity[] = {cpu_to_be32(0x5),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(cs->numa_node),
> +                                cpu_to_be32(index)};
> +
> +    /* Advertise NUMA via ibm,associativity */
> +    if (nb_numa_nodes > 1) {
> +        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> +                          sizeof(associativity));
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> +                      pft_size_prop, sizeof(pft_size_prop));
> +    if (ret < 0) {
> +        return ret;
> +    }

The pft-size property isn't actually related to NUMA, so it doesn't
really belong in this function.

> +    return spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
> +                                 ppc_get_compat_smt_threads(cpu));

Likewise calling the smt fixup function from the numa fixup function
just seems odd to me; just be explicit and call the two sequentially.

Overall this seems an odd way to split things.  Why not just make a
spapr_fixup_one_cpu_dt() function, or similar, which should do all the
necessary pieces.

> +}
> +
>  static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>  {
>      int ret = 0, offset, cpus_offset;
>      CPUState *cs;
>      char cpu_model[32];
>      int smt = kvmppc_smt_threads();
> -    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
>  
>      CPU_FOREACH(cs) {
>          PowerPCCPU *cpu = POWERPC_CPU(cs);
>          DeviceClass *dc = DEVICE_GET_CLASS(cs);
>          int index = ppc_get_vcpu_dt_id(cpu);
> -        uint32_t associativity[] = {cpu_to_be32(0x5),
> -                                    cpu_to_be32(0x0),
> -                                    cpu_to_be32(0x0),
> -                                    cpu_to_be32(0x0),
> -                                    cpu_to_be32(cs->numa_node),
> -                                    cpu_to_be32(index)};
>  
>          if ((index % smt) != 0) {
>              continue;
> @@ -247,22 +273,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>              }
>          }
>  
> -        if (nb_numa_nodes > 1) {
> -            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> -                              sizeof(associativity));
> -            if (ret < 0) {
> -                return ret;
> -            }
> -        }
> -
> -        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> -                          pft_size_prop, sizeof(pft_size_prop));
> -        if (ret < 0) {
> -            return ret;
> -        }
> -
> -        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
> -                                     ppc_get_compat_smt_threads(cpu));
> +        ret = spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr);
>          if (ret < 0) {
>              return ret;
>          }
> @@ -341,18 +352,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     uint32_t epow_irq)
>  {
>      void *fdt;
> -    CPUState *cs;
>      uint32_t start_prop = cpu_to_be32(initrd_base);
>      uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
>      GString *hypertas = g_string_sized_new(256);
>      GString *qemu_hypertas = g_string_sized_new(256);
>      uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
>      uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
> -    int smt = kvmppc_smt_threads();
>      unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
> -    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
> -    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      char *buf;
>  
>      add_str(hypertas, "hcall-pft");
> @@ -441,107 +447,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>  
>      _FDT((fdt_end_node(fdt)));
>  
> -    /* cpus */
> -    _FDT((fdt_begin_node(fdt, "cpus")));
> -
> -    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> -    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> -
> -    CPU_FOREACH(cs) {
> -        PowerPCCPU *cpu = POWERPC_CPU(cs);
> -        CPUPPCState *env = &cpu->env;
> -        DeviceClass *dc = DEVICE_GET_CLASS(cs);
> -        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> -        int index = ppc_get_vcpu_dt_id(cpu);
> -        char *nodename;
> -        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> -                           0xffffffff, 0xffffffff};
> -        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> -        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> -        uint32_t page_sizes_prop[64];
> -        size_t page_sizes_prop_size;
> -
> -        if ((index % smt) != 0) {
> -            continue;
> -        }
> -
> -        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> -
> -        _FDT((fdt_begin_node(fdt, nodename)));
> -
> -        g_free(nodename);
> -
> -        _FDT((fdt_property_cell(fdt, "reg", index)));
> -        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> -
> -        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> -        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
> -                                env->dcache_line_size)));
> -        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
> -                                env->dcache_line_size)));
> -        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
> -                                env->icache_line_size)));
> -        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
> -                                env->icache_line_size)));
> -
> -        if (pcc->l1_dcache_size) {
> -            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> -        } else {
> -            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> -        }
> -        if (pcc->l1_icache_size) {
> -            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
> -        } else {
> -            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> -        }
> -
> -        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> -        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> -        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> -        _FDT((fdt_property_string(fdt, "status", "okay")));
> -        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> -
> -        if (env->spr_cb[SPR_PURR].oea_read) {
> -            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
> -        }
> -
> -        if (env->mmu_model & POWERPC_MMU_1TSEG) {
> -            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
> -                               segs, sizeof(segs))));
> -        }
> -
> -        /* Advertise VMX/VSX (vector extensions) if available
> -         *   0 / no property == no vector extensions
> -         *   1               == VMX / Altivec available
> -         *   2               == VSX available */
> -        if (env->insns_flags & PPC_ALTIVEC) {
> -            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> -
> -            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
> -        }
> -
> -        /* Advertise DFP (Decimal Floating Point) if available
> -         *   0 / no property == no DFP
> -         *   1               == DFP available */
> -        if (env->insns_flags2 & PPC2_DFP) {
> -            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
> -        }
> -
> -        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> -                                                      sizeof(page_sizes_prop));
> -        if (page_sizes_prop_size) {
> -            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
> -                               page_sizes_prop, page_sizes_prop_size)));
> -        }
> -
> -        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
> -                                cs->cpu_index / cpus_per_socket)));
> -
> -        _FDT((fdt_end_node(fdt)));
> -    }
> -
> -    _FDT((fdt_end_node(fdt)));
> -
>      /* RTAS */
>      _FDT((fdt_begin_node(fdt, "rtas")));
>  
> @@ -739,6 +644,124 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
>      return 0;
>  }
>  
> +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    int index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
> +    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> +    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> +                            env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> +                            env->icache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> +                             pcc->l1_dcache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> +                             pcc->l1_icache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> +                            cs->cpu_index / cpus_per_socket)));
> +
> +    _FDT(spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr));
> +}
> +
> +static void spapr_populate_cpu_dt_node(void *fdt, sPAPREnvironment *spapr)


I'd suggest s/cpu/cpus/.  If anything "populate_cpu_dt_node" sounds
more like it covers a single cpu than "populate_cpu_dt".

> +{
> +    CPUState *cs;
> +    int cpus_offset;
> +    char *nodename;
> +    int smt = kvmppc_smt_threads();
> +
> +    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
> +    _FDT(cpus_offset);
> +    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
> +    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
> +
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        int index = ppc_get_vcpu_dt_id(cpu);
> +        DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +        int offset;
> +
> +        if ((index % smt) != 0) {
> +            continue;
> +        }
> +
> +        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> +        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
> +        g_free(nodename);
> +        _FDT(offset);
> +        spapr_populate_cpu_dt(cs, fdt, offset);
> +    }
> +
> +}
> +
>  static void spapr_finalize_fdt(sPAPREnvironment *spapr,
>                                 hwaddr fdt_addr,
>                                 hwaddr rtas_addr,
> @@ -782,11 +805,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
>          fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
>      }
>  
> -    /* Advertise NUMA via ibm,associativity */
> -    ret = spapr_fixup_cpu_dt(fdt, spapr);
> -    if (ret < 0) {
> -        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
> -    }
> +    /* cpus */
> +    spapr_populate_cpu_dt_node(fdt, spapr);
>  
>      bootlist = get_boot_devices_list(&cb, true);
>      if (cb && bootlist) {
Bharata B Rao March 25, 2015, 8:26 a.m. UTC | #2
On Wed, Mar 25, 2015 at 12:36:38PM +1100, David Gibson wrote:
> On Mon, Mar 23, 2015 at 07:05:46PM +0530, Bharata B Rao wrote:
> > Reorganize CPU device tree generation code so that it be reused from
> > hotplug path. CPU dt entries are now generated from spapr_finalize_fdt()
> > instead of spapr_create_fdt_skel().
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > ---
> >  hw/ppc/spapr.c | 288 ++++++++++++++++++++++++++++++---------------------------
> >  1 file changed, 154 insertions(+), 134 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 36ff754..1a25cc0 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -206,24 +206,50 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
> >      return ret;
> >  }
> >  
> > +static int spapr_fixup_cpu_numa_smt_dt(void *fdt, int offset, CPUState *cs,
> > +                                        sPAPREnvironment *spapr)
> > +{
> > +    int ret;
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    int index = ppc_get_vcpu_dt_id(cpu);
> > +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> > +    uint32_t associativity[] = {cpu_to_be32(0x5),
> > +                                cpu_to_be32(0x0),
> > +                                cpu_to_be32(0x0),
> > +                                cpu_to_be32(0x0),
> > +                                cpu_to_be32(cs->numa_node),
> > +                                cpu_to_be32(index)};
> > +
> > +    /* Advertise NUMA via ibm,associativity */
> > +    if (nb_numa_nodes > 1) {
> > +        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> > +                          sizeof(associativity));
> > +        if (ret < 0) {
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> > +                      pft_size_prop, sizeof(pft_size_prop));
> > +    if (ret < 0) {
> > +        return ret;
> > +    }
> 
> The pft-size property isn't actually related to NUMA, so it doesn't
> really belong in this function.

Right, let me find an appropriate place to set this.

> 
> > +    return spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
> > +                                 ppc_get_compat_smt_threads(cpu));
> 
> Likewise calling the smt fixup function from the numa fixup function
> just seems odd to me; just be explicit and call the two sequentially.

It's just poor naming, I meant numa-and-smt. So essentially it is
fixing up NUMA and SMT related properties.

> 
> Overall this seems an odd way to split things.  Why not just make a
> spapr_fixup_one_cpu_dt() function, or similar, which should do all the
> necessary pieces.

Just one routine to setup everything related to CPU DT will not work
because some parts (NUMA and SMT bits) can be potentially fixed up
later as part of ibm,client-architecture-support call. This is the
reason why I have split up the reorg in this manner. Anyway will take
another stab at this and see if I can improve this.

> 
> > +}
> > +
> >  static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> >  {
> >      int ret = 0, offset, cpus_offset;
> >      CPUState *cs;
> >      char cpu_model[32];
> >      int smt = kvmppc_smt_threads();
> > -    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> >  
> >      CPU_FOREACH(cs) {
> >          PowerPCCPU *cpu = POWERPC_CPU(cs);
> >          DeviceClass *dc = DEVICE_GET_CLASS(cs);
> >          int index = ppc_get_vcpu_dt_id(cpu);
> > -        uint32_t associativity[] = {cpu_to_be32(0x5),
> > -                                    cpu_to_be32(0x0),
> > -                                    cpu_to_be32(0x0),
> > -                                    cpu_to_be32(0x0),
> > -                                    cpu_to_be32(cs->numa_node),
> > -                                    cpu_to_be32(index)};
> >  
> >          if ((index % smt) != 0) {
> >              continue;
> > @@ -247,22 +273,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> >              }
> >          }
> >  
> > -        if (nb_numa_nodes > 1) {
> > -            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> > -                              sizeof(associativity));
> > -            if (ret < 0) {
> > -                return ret;
> > -            }
> > -        }
> > -
> > -        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> > -                          pft_size_prop, sizeof(pft_size_prop));
> > -        if (ret < 0) {
> > -            return ret;
> > -        }
> > -
> > -        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
> > -                                     ppc_get_compat_smt_threads(cpu));
> > +        ret = spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr);
> >          if (ret < 0) {
> >              return ret;
> >          }
> > @@ -341,18 +352,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >                                     uint32_t epow_irq)
> >  {
> >      void *fdt;
> > -    CPUState *cs;
> >      uint32_t start_prop = cpu_to_be32(initrd_base);
> >      uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> >      GString *hypertas = g_string_sized_new(256);
> >      GString *qemu_hypertas = g_string_sized_new(256);
> >      uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
> >      uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
> > -    int smt = kvmppc_smt_threads();
> >      unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
> > -    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
> > -    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> > -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
> >      char *buf;
> >  
> >      add_str(hypertas, "hcall-pft");
> > @@ -441,107 +447,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
> >  
> >      _FDT((fdt_end_node(fdt)));
> >  
> > -    /* cpus */
> > -    _FDT((fdt_begin_node(fdt, "cpus")));
> > -
> > -    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
> > -    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
> > -
> > -    CPU_FOREACH(cs) {
> > -        PowerPCCPU *cpu = POWERPC_CPU(cs);
> > -        CPUPPCState *env = &cpu->env;
> > -        DeviceClass *dc = DEVICE_GET_CLASS(cs);
> > -        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> > -        int index = ppc_get_vcpu_dt_id(cpu);
> > -        char *nodename;
> > -        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> > -                           0xffffffff, 0xffffffff};
> > -        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> > -        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> > -        uint32_t page_sizes_prop[64];
> > -        size_t page_sizes_prop_size;
> > -
> > -        if ((index % smt) != 0) {
> > -            continue;
> > -        }
> > -
> > -        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
> > -
> > -        _FDT((fdt_begin_node(fdt, nodename)));
> > -
> > -        g_free(nodename);
> > -
> > -        _FDT((fdt_property_cell(fdt, "reg", index)));
> > -        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> > -
> > -        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> > -        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
> > -                                env->dcache_line_size)));
> > -        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
> > -                                env->dcache_line_size)));
> > -        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
> > -                                env->icache_line_size)));
> > -        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
> > -                                env->icache_line_size)));
> > -
> > -        if (pcc->l1_dcache_size) {
> > -            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
> > -        } else {
> > -            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> > -        }
> > -        if (pcc->l1_icache_size) {
> > -            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
> > -        } else {
> > -            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> > -        }
> > -
> > -        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
> > -        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
> > -        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> > -        _FDT((fdt_property_string(fdt, "status", "okay")));
> > -        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> > -
> > -        if (env->spr_cb[SPR_PURR].oea_read) {
> > -            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
> > -        }
> > -
> > -        if (env->mmu_model & POWERPC_MMU_1TSEG) {
> > -            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
> > -                               segs, sizeof(segs))));
> > -        }
> > -
> > -        /* Advertise VMX/VSX (vector extensions) if available
> > -         *   0 / no property == no vector extensions
> > -         *   1               == VMX / Altivec available
> > -         *   2               == VSX available */
> > -        if (env->insns_flags & PPC_ALTIVEC) {
> > -            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> > -
> > -            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
> > -        }
> > -
> > -        /* Advertise DFP (Decimal Floating Point) if available
> > -         *   0 / no property == no DFP
> > -         *   1               == DFP available */
> > -        if (env->insns_flags2 & PPC2_DFP) {
> > -            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
> > -        }
> > -
> > -        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> > -                                                      sizeof(page_sizes_prop));
> > -        if (page_sizes_prop_size) {
> > -            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
> > -                               page_sizes_prop, page_sizes_prop_size)));
> > -        }
> > -
> > -        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
> > -                                cs->cpu_index / cpus_per_socket)));
> > -
> > -        _FDT((fdt_end_node(fdt)));
> > -    }
> > -
> > -    _FDT((fdt_end_node(fdt)));
> > -
> >      /* RTAS */
> >      _FDT((fdt_begin_node(fdt, "rtas")));
> >  
> > @@ -739,6 +644,124 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
> >      return 0;
> >  }
> >  
> > +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset)
> > +{
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    CPUPPCState *env = &cpu->env;
> > +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> > +    int index = ppc_get_vcpu_dt_id(cpu);
> > +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> > +                       0xffffffff, 0xffffffff};
> > +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> > +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> > +    uint32_t page_sizes_prop[64];
> > +    size_t page_sizes_prop_size;
> > +    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
> > +    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> > +    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
> > +
> > +    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> > +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> > +
> > +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> > +                            env->dcache_line_size)));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> > +                            env->dcache_line_size)));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> > +                            env->icache_line_size)));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> > +                            env->icache_line_size)));
> > +
> > +    if (pcc->l1_dcache_size) {
> > +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> > +                             pcc->l1_dcache_size)));
> > +    } else {
> > +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> > +    }
> > +    if (pcc->l1_icache_size) {
> > +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> > +                             pcc->l1_icache_size)));
> > +    } else {
> > +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> > +    }
> > +
> > +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> > +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> > +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> > +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> > +
> > +    if (env->spr_cb[SPR_PURR].oea_read) {
> > +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> > +    }
> > +
> > +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> > +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> > +                           segs, sizeof(segs))));
> > +    }
> > +
> > +    /* Advertise VMX/VSX (vector extensions) if available
> > +     *   0 / no property == no vector extensions
> > +     *   1               == VMX / Altivec available
> > +     *   2               == VSX available */
> > +    if (env->insns_flags & PPC_ALTIVEC) {
> > +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> > +
> > +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> > +    }
> > +
> > +    /* Advertise DFP (Decimal Floating Point) if available
> > +     *   0 / no property == no DFP
> > +     *   1               == DFP available */
> > +    if (env->insns_flags2 & PPC2_DFP) {
> > +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> > +    }
> > +
> > +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> > +                                                  sizeof(page_sizes_prop));
> > +    if (page_sizes_prop_size) {
> > +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> > +                           page_sizes_prop, page_sizes_prop_size)));
> > +    }
> > +
> > +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> > +                            cs->cpu_index / cpus_per_socket)));
> > +
> > +    _FDT(spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr));
> > +}
> > +
> > +static void spapr_populate_cpu_dt_node(void *fdt, sPAPREnvironment *spapr)
> 
> 
> I'd suggest s/cpu/cpus/.  If anything "populate_cpu_dt_node" sounds
> more like it covers a single cpu than "populate_cpu_dt".

Sure.
David Gibson March 26, 2015, 1:40 a.m. UTC | #3
On Wed, Mar 25, 2015 at 01:56:17PM +0530, Bharata B Rao wrote:
> On Wed, Mar 25, 2015 at 12:36:38PM +1100, David Gibson wrote:
> > On Mon, Mar 23, 2015 at 07:05:46PM +0530, Bharata B Rao wrote:
> > > Reorganize CPU device tree generation code so that it be reused from
> > > hotplug path. CPU dt entries are now generated from spapr_finalize_fdt()
> > > instead of spapr_create_fdt_skel().
> > > 
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > > ---
> > >  hw/ppc/spapr.c | 288 ++++++++++++++++++++++++++++++---------------------------
> > >  1 file changed, 154 insertions(+), 134 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 36ff754..1a25cc0 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -206,24 +206,50 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
> > >      return ret;
> > >  }
> > >  
> > > +static int spapr_fixup_cpu_numa_smt_dt(void *fdt, int offset, CPUState *cs,
> > > +                                        sPAPREnvironment *spapr)
> > > +{
> > > +    int ret;
> > > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > > +    int index = ppc_get_vcpu_dt_id(cpu);
> > > +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> > > +    uint32_t associativity[] = {cpu_to_be32(0x5),
> > > +                                cpu_to_be32(0x0),
> > > +                                cpu_to_be32(0x0),
> > > +                                cpu_to_be32(0x0),
> > > +                                cpu_to_be32(cs->numa_node),
> > > +                                cpu_to_be32(index)};
> > > +
> > > +    /* Advertise NUMA via ibm,associativity */
> > > +    if (nb_numa_nodes > 1) {
> > > +        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> > > +                          sizeof(associativity));
> > > +        if (ret < 0) {
> > > +            return ret;
> > > +        }
> > > +    }
> > > +
> > > +    ret = fdt_setprop(fdt, offset, "ibm,pft-size",
> > > +                      pft_size_prop, sizeof(pft_size_prop));
> > > +    if (ret < 0) {
> > > +        return ret;
> > > +    }
> > 
> > The pft-size property isn't actually related to NUMA, so it doesn't
> > really belong in this function.
> 
> Right, let me find an appropriate place to set this.

The top level cpu_fixup function sounds right to me.

> 
> > 
> > > +    return spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
> > > +                                 ppc_get_compat_smt_threads(cpu));
> > 
> > Likewise calling the smt fixup function from the numa fixup function
> > just seems odd to me; just be explicit and call the two sequentially.
> 
> It's just poor naming, I meant numa-and-smt. So essentially it is
> fixing up NUMA and SMT related properties.

I realise that it's NUMA and SMT properties - but "NUMA & SMT" seems a
very arbitrary distinction.  It's not clear why that's a useful subset
of things to be handled by their own function.

> > Overall this seems an odd way to split things.  Why not just make a
> > spapr_fixup_one_cpu_dt() function, or similar, which should do all the
> > necessary pieces.
> 
> Just one routine to setup everything related to CPU DT will not work
> because some parts (NUMA and SMT bits) can be potentially fixed up
> later as part of ibm,client-architecture-support call. This is the
> reason why I have split up the reorg in this manner. Anyway will take
> another stab at this and see if I can improve this.

Ok.
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 36ff754..1a25cc0 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -206,24 +206,50 @@  static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
     return ret;
 }
 
+static int spapr_fixup_cpu_numa_smt_dt(void *fdt, int offset, CPUState *cs,
+                                        sPAPREnvironment *spapr)
+{
+    int ret;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+    uint32_t associativity[] = {cpu_to_be32(0x5),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(cs->numa_node),
+                                cpu_to_be32(index)};
+
+    /* Advertise NUMA via ibm,associativity */
+    if (nb_numa_nodes > 1) {
+        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                          sizeof(associativity));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = fdt_setprop(fdt, offset, "ibm,pft-size",
+                      pft_size_prop, sizeof(pft_size_prop));
+    if (ret < 0) {
+        return ret;
+    }
+
+    return spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
+                                 ppc_get_compat_smt_threads(cpu));
+}
+
 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
     int ret = 0, offset, cpus_offset;
     CPUState *cs;
     char cpu_model[32];
     int smt = kvmppc_smt_threads();
-    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
         int index = ppc_get_vcpu_dt_id(cpu);
-        uint32_t associativity[] = {cpu_to_be32(0x5),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(cs->numa_node),
-                                    cpu_to_be32(index)};
 
         if ((index % smt) != 0) {
             continue;
@@ -247,22 +273,7 @@  static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
             }
         }
 
-        if (nb_numa_nodes > 1) {
-            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
-                              sizeof(associativity));
-            if (ret < 0) {
-                return ret;
-            }
-        }
-
-        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
-                          pft_size_prop, sizeof(pft_size_prop));
-        if (ret < 0) {
-            return ret;
-        }
-
-        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
-                                     ppc_get_compat_smt_threads(cpu));
+        ret = spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr);
         if (ret < 0) {
             return ret;
         }
@@ -341,18 +352,13 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
                                    uint32_t epow_irq)
 {
     void *fdt;
-    CPUState *cs;
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
     GString *hypertas = g_string_sized_new(256);
     GString *qemu_hypertas = g_string_sized_new(256);
     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
-    int smt = kvmppc_smt_threads();
     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
-    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
-    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
     char *buf;
 
     add_str(hypertas, "hcall-pft");
@@ -441,107 +447,6 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
 
     _FDT((fdt_end_node(fdt)));
 
-    /* cpus */
-    _FDT((fdt_begin_node(fdt, "cpus")));
-
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
-
-    CPU_FOREACH(cs) {
-        PowerPCCPU *cpu = POWERPC_CPU(cs);
-        CPUPPCState *env = &cpu->env;
-        DeviceClass *dc = DEVICE_GET_CLASS(cs);
-        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
-        int index = ppc_get_vcpu_dt_id(cpu);
-        char *nodename;
-        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
-                           0xffffffff, 0xffffffff};
-        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
-        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
-        uint32_t page_sizes_prop[64];
-        size_t page_sizes_prop_size;
-
-        if ((index % smt) != 0) {
-            continue;
-        }
-
-        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
-
-        _FDT((fdt_begin_node(fdt, nodename)));
-
-        g_free(nodename);
-
-        _FDT((fdt_property_cell(fdt, "reg", index)));
-        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
-
-        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
-        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
-                                env->dcache_line_size)));
-        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
-                                env->dcache_line_size)));
-        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
-                                env->icache_line_size)));
-        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
-                                env->icache_line_size)));
-
-        if (pcc->l1_dcache_size) {
-            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
-        } else {
-            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
-        }
-        if (pcc->l1_icache_size) {
-            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
-        } else {
-            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
-        }
-
-        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
-        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
-        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
-        _FDT((fdt_property_string(fdt, "status", "okay")));
-        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
-
-        if (env->spr_cb[SPR_PURR].oea_read) {
-            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
-        }
-
-        if (env->mmu_model & POWERPC_MMU_1TSEG) {
-            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
-                               segs, sizeof(segs))));
-        }
-
-        /* Advertise VMX/VSX (vector extensions) if available
-         *   0 / no property == no vector extensions
-         *   1               == VMX / Altivec available
-         *   2               == VSX available */
-        if (env->insns_flags & PPC_ALTIVEC) {
-            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
-
-            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
-        }
-
-        /* Advertise DFP (Decimal Floating Point) if available
-         *   0 / no property == no DFP
-         *   1               == DFP available */
-        if (env->insns_flags2 & PPC2_DFP) {
-            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
-        }
-
-        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
-                                                      sizeof(page_sizes_prop));
-        if (page_sizes_prop_size) {
-            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
-                               page_sizes_prop, page_sizes_prop_size)));
-        }
-
-        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
-                                cs->cpu_index / cpus_per_socket)));
-
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    _FDT((fdt_end_node(fdt)));
-
     /* RTAS */
     _FDT((fdt_begin_node(fdt, "rtas")));
 
@@ -739,6 +644,124 @@  static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     return 0;
 }
 
+static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    int index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
+    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
+    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                             pcc->l1_dcache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                             pcc->l1_icache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+                            cs->cpu_index / cpus_per_socket)));
+
+    _FDT(spapr_fixup_cpu_numa_smt_dt(fdt, offset, cs, spapr));
+}
+
+static void spapr_populate_cpu_dt_node(void *fdt, sPAPREnvironment *spapr)
+{
+    CPUState *cs;
+    int cpus_offset;
+    char *nodename;
+    int smt = kvmppc_smt_threads();
+
+    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+    _FDT(cpus_offset);
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int index = ppc_get_vcpu_dt_id(cpu);
+        DeviceClass *dc = DEVICE_GET_CLASS(cs);
+        int offset;
+
+        if ((index % smt) != 0) {
+            continue;
+        }
+
+        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+        g_free(nodename);
+        _FDT(offset);
+        spapr_populate_cpu_dt(cs, fdt, offset);
+    }
+
+}
+
 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                                hwaddr fdt_addr,
                                hwaddr rtas_addr,
@@ -782,11 +805,8 @@  static void spapr_finalize_fdt(sPAPREnvironment *spapr,
         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
     }
 
-    /* Advertise NUMA via ibm,associativity */
-    ret = spapr_fixup_cpu_dt(fdt, spapr);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
-    }
+    /* cpus */
+    spapr_populate_cpu_dt_node(fdt, spapr);
 
     bootlist = get_boot_devices_list(&cb, true);
     if (cb && bootlist) {