Message ID | 1420697420-16053-7-git-send-email-bharata@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Quoting Bharata B Rao (2015-01-08 00:10:13) > Support CPU hotplug via device-add command. Use the exising EPOW event > infrastructure to send CPU hotplug notification to the guest. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_events.c | 8 +- > target-ppc/translate_init.c | 6 ++ > 3 files changed, 215 insertions(+), 4 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 515d770..a293a59 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) > g_string_append_len(s, s1, strlen(s1) + 1); > } > > +uint32_t cpus_per_socket; > + > static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; > QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); > unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; > - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > char *buf; > > + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > add_str(hypertas, "hcall-pft"); > add_str(hypertas, "hcall-term"); > add_str(hypertas, "hcall-dabr"); > @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) > } > } > > +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ > +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, > + int drc_index) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + int index = ppc_get_vcpu_dt_id(cpu); > + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > + 0xffffffff, 0xffffffff}; > + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; > + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; > + uint32_t page_sizes_prop[64]; > + size_t page_sizes_prop_size; > + int smpt = ppc_get_compat_smt_threads(cpu); > + uint32_t servers_prop[smpt]; > + uint32_t gservers_prop[smpt * 2]; > + int i; > + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > + uint32_t associativity[] = {cpu_to_be32(0x5), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(cs->numa_node), > + cpu_to_be32(index)}; > + > + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); > + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > + env->icache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > + env->icache_line_size))); > + > + if (pcc->l1_dcache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > + pcc->l1_dcache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); > + } > + if (pcc->l1_icache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > + pcc->l1_icache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > + > + if (env->spr_cb[SPR_PURR].oea_read) { > + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > + } > + > + if (env->mmu_model & POWERPC_MMU_1TSEG) { > + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > + segs, sizeof(segs)))); > + } > + > + /* Advertise VMX/VSX (vector extensions) if available > + * 0 / no property == no vector extensions > + * 1 == VMX / Altivec available > + * 2 == VSX available */ > + if (env->insns_flags & PPC_ALTIVEC) { > + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > + } > + > + /* Advertise DFP (Decimal Floating Point) if available > + * 0 / no property == no DFP > + * 1 == DFP available */ > + if (env->insns_flags2 & PPC2_DFP) { > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > + } > + > + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, > + sizeof(page_sizes_prop)); > + if (page_sizes_prop_size) { > + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > + page_sizes_prop, page_sizes_prop_size))); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", > + cs->cpu_index / cpus_per_socket))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); > + > + /* Build interrupt servers and gservers properties */ > + for (i = 0; i < smpt; i++) { > + servers_prop[i] = cpu_to_be32(index + i); > + /* Hack, direct the group queues back to cpu 0 */ > + gservers_prop[i*2] = cpu_to_be32(index + i); > + gservers_prop[i*2 + 1] = 0; > + } > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > + servers_prop, sizeof(servers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > + gservers_prop, sizeof(gservers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", > + pft_size_prop, sizeof(pft_size_prop))); > + > + if (nb_numa_nodes > 1) { > + _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity, > + sizeof(associativity))); > + } > +} > + > +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > + int id = ppc_get_vcpu_dt_id(cpu); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + int drc_index = drck->get_index(drc); > + void *fdt, *fdt_orig; > + int offset, i; > + char *nodename; > + > + /* add OF node for CPU and required OF DT properties */ > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > + > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > + > + offset = fdt_add_subnode(fdt, offset, nodename); > + > + /* Set NUMA node for the added CPU */ > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > + cs->numa_node = i; > + break; > + } > + } > + > + spapr_populate_cpu_dt(cs, fdt, offset, drc_index); > + g_free(fdt_orig); > + g_free(nodename); > + > + drck->attach(drc, dev, fdt, offset, false); > +} > + > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + Error *local_err = NULL; > + CPUState *cs = CPU(dev); > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); > + > + /* TODO: Check if DR is enabled ? */ Er, I take back my comment from earlier, you do also need to check for spapr->dr_cpu_enabled below, since in the case of PCI it's enabled on a per-PHB basis, whereas here it's a machine-wide option... > + g_assert(drc); > + > + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); > + spapr_cpu_hotplug_add(dev, cs); > + spapr_hotplug_req_add_event(drc); > + error_propagate(errp, local_err); > + return; > +} > + > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + if (dev->hotplugged) { Maybe just if (dev->hotplugged && spapr->dr_cpu_enabled) { ... Would do it > + spapr_cpu_plug(hotplug_dev, dev, errp); > + } > + } > +} > + > +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, > + DeviceState *dev) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + return HOTPLUG_HANDLER(machine); > + } > + return NULL; > +} > + > static void spapr_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); > FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); > NMIClass *nc = NMI_CLASS(oc); > + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); > > mc->init = ppc_spapr_init; > mc->reset = ppc_spapr_reset; > @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > mc->default_boot_order = NULL; > mc->kvm_type = spapr_kvm_type; > mc->has_dynamic_sysbus = true; > + mc->get_hotplug_handler = spapr_get_hotpug_handler; > + > + hc->plug = spapr_machine_device_plug; > smc->dr_phb_enabled = false; > smc->dr_cpu_enabled = false; > smc->dr_lmb_enabled = false; > @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = { > .interfaces = (InterfaceInfo[]) { > { TYPE_FW_PATH_PROVIDER }, > { TYPE_NMI }, > + { TYPE_HOTPLUG_HANDLER }, > { } > }, > }; > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 434a75d..035d8c9 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) > hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); > hp->hdr.section_version = 1; /* includes extended modifier */ > hp->hotplug_action = hp_action; > - > + hp->drc.index = cpu_to_be32(drck->get_index(drc)); > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > > switch (drc_type) { > case SPAPR_DR_CONNECTOR_TYPE_PCI: > - hp->drc.index = cpu_to_be32(drck->get_index(drc)); > - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; > break; > + case SPAPR_DR_CONNECTOR_TYPE_CPU: > + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; > + break; > default: > /* skip notification for unknown connector types */ > g_free(new_hp); > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > index 9c642a5..cf9d8d3 100644 > --- a/target-ppc/translate_init.c > +++ b/target-ppc/translate_init.c > @@ -32,6 +32,7 @@ > #include "hw/qdev-properties.h" > #include "hw/ppc/spapr.h" > #include "hw/ppc/ppc.h" > +#include "sysemu/sysemu.h" > > //#define PPC_DUMP_CPU > //#define PPC_DEBUG_SPR > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) > return; > } > > + if (cs->cpu_index >= max_cpus) { > + error_setg(errp, "Can't have more CPUs, maxcpus limit reached"); > + return; > + } > + > cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt > + (cs->cpu_index % smp_threads); > #endif > -- > 2.1.0
On Thu, 8 Jan 2015 11:40:13 +0530 Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > Support CPU hotplug via device-add command. Use the exising EPOW event > infrastructure to send CPU hotplug notification to the guest. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_events.c | 8 +- > target-ppc/translate_init.c | 6 ++ > 3 files changed, 215 insertions(+), 4 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 515d770..a293a59 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) > g_string_append_len(s, s1, strlen(s1) + 1); > } > > +uint32_t cpus_per_socket; static ??? > + > static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; > QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); > unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; > - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > char *buf; > > + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > add_str(hypertas, "hcall-pft"); > add_str(hypertas, "hcall-term"); > add_str(hypertas, "hcall-dabr"); > @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) > } > } > > +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ > +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, > + int drc_index) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + int index = ppc_get_vcpu_dt_id(cpu); > + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > + 0xffffffff, 0xffffffff}; > + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; > + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; > + uint32_t page_sizes_prop[64]; > + size_t page_sizes_prop_size; > + int smpt = ppc_get_compat_smt_threads(cpu); > + uint32_t servers_prop[smpt]; > + uint32_t gservers_prop[smpt * 2]; > + int i; > + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > + uint32_t associativity[] = {cpu_to_be32(0x5), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(cs->numa_node), > + cpu_to_be32(index)}; > + > + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); > + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > + env->icache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > + env->icache_line_size))); > + > + if (pcc->l1_dcache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > + pcc->l1_dcache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); > + } > + if (pcc->l1_icache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > + pcc->l1_icache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > + > + if (env->spr_cb[SPR_PURR].oea_read) { > + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > + } > + > + if (env->mmu_model & POWERPC_MMU_1TSEG) { > + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > + segs, sizeof(segs)))); > + } > + > + /* Advertise VMX/VSX (vector extensions) if available > + * 0 / no property == no vector extensions > + * 1 == VMX / Altivec available > + * 2 == VSX available */ > + if (env->insns_flags & PPC_ALTIVEC) { > + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > + } > + > + /* Advertise DFP (Decimal Floating Point) if available > + * 0 / no property == no DFP > + * 1 == DFP available */ > + if (env->insns_flags2 & PPC2_DFP) { > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > + } > + > + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, > + sizeof(page_sizes_prop)); > + if (page_sizes_prop_size) { > + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > + page_sizes_prop, page_sizes_prop_size))); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", > + cs->cpu_index / cpus_per_socket))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); > + > + /* Build interrupt servers and gservers properties */ > + for (i = 0; i < smpt; i++) { > + servers_prop[i] = cpu_to_be32(index + i); > + /* Hack, direct the group queues back to cpu 0 */ > + gservers_prop[i*2] = cpu_to_be32(index + i); > + gservers_prop[i*2 + 1] = 0; > + } > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > + servers_prop, sizeof(servers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > + gservers_prop, sizeof(gservers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", > + pft_size_prop, sizeof(pft_size_prop))); > + > + if (nb_numa_nodes > 1) { > + _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity, > + sizeof(associativity))); > + } > +} > + > +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > + int id = ppc_get_vcpu_dt_id(cpu); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + int drc_index = drck->get_index(drc); > + void *fdt, *fdt_orig; > + int offset, i; > + char *nodename; > + > + /* add OF node for CPU and required OF DT properties */ > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > + > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > + > + offset = fdt_add_subnode(fdt, offset, nodename); > + > + /* Set NUMA node for the added CPU */ > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > + cs->numa_node = i; > + break; > + } > + } > + > + spapr_populate_cpu_dt(cs, fdt, offset, drc_index); > + g_free(fdt_orig); > + g_free(nodename); > + > + drck->attach(drc, dev, fdt, offset, false); > +} > + > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + Error *local_err = NULL; > + CPUState *cs = CPU(dev); > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); just rant: does this have any relation to hotplug_dev, the point here is to get these data from hotplug_dev object/some child of it rather then via direct adhoc call. > + > + /* TODO: Check if DR is enabled ? */ > + g_assert(drc); > + > + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); reset probably should be don at realize time, see x86_cpu_realizefn() for example. > + spapr_cpu_hotplug_add(dev, cs); > + spapr_hotplug_req_add_event(drc); > + error_propagate(errp, local_err); > + return; > +} > + > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + if (dev->hotplugged) { > + spapr_cpu_plug(hotplug_dev, dev, errp); Would be nicer if this could do cold-plugged CPUs wiring too. > + } > + } > +} > + > +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, > + DeviceState *dev) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + return HOTPLUG_HANDLER(machine); > + } > + return NULL; > +} > + > static void spapr_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); > FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); > NMIClass *nc = NMI_CLASS(oc); > + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); > > mc->init = ppc_spapr_init; > mc->reset = ppc_spapr_reset; > @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > mc->default_boot_order = NULL; > mc->kvm_type = spapr_kvm_type; > mc->has_dynamic_sysbus = true; > + mc->get_hotplug_handler = spapr_get_hotpug_handler; > + > + hc->plug = spapr_machine_device_plug; > smc->dr_phb_enabled = false; > smc->dr_cpu_enabled = false; > smc->dr_lmb_enabled = false; > @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = { > .interfaces = (InterfaceInfo[]) { > { TYPE_FW_PATH_PROVIDER }, > { TYPE_NMI }, > + { TYPE_HOTPLUG_HANDLER }, > { } > }, > }; > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 434a75d..035d8c9 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) > hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); > hp->hdr.section_version = 1; /* includes extended modifier */ > hp->hotplug_action = hp_action; > - > + hp->drc.index = cpu_to_be32(drck->get_index(drc)); > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > > switch (drc_type) { > case SPAPR_DR_CONNECTOR_TYPE_PCI: > - hp->drc.index = cpu_to_be32(drck->get_index(drc)); > - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; > break; > + case SPAPR_DR_CONNECTOR_TYPE_CPU: > + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; > + break; > default: > /* skip notification for unknown connector types */ > g_free(new_hp); > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > index 9c642a5..cf9d8d3 100644 > --- a/target-ppc/translate_init.c > +++ b/target-ppc/translate_init.c > @@ -32,6 +32,7 @@ > #include "hw/qdev-properties.h" > #include "hw/ppc/spapr.h" > #include "hw/ppc/ppc.h" > +#include "sysemu/sysemu.h" > > //#define PPC_DUMP_CPU > //#define PPC_DEBUG_SPR > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) > return; > } > > + if (cs->cpu_index >= max_cpus) { pls note that cpu_index is monotonically increases, so when you do unplug and then plug it will go above max_cpus or the same will happen if one device_add fails in the middle, the next CPU add will fail because of cs->cpu_index goes overboard. I'd suggest not to rely/use cpu_index for any purposes and use other means to identify where cpu is plugged in. On x68 we slowly getting rid of this dependency in favor of apic_id (topology information), eventually it could become: -device cpu_foo,socket=X,core=Y[,thread=Z][,node=N] you probably could do the same. It doesn't have to be in this series, just be aware of potential issues. > + error_setg(errp, "Can't have more CPUs, maxcpus limit reached"); > + return; > + } > + > cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt > + (cs->cpu_index % smp_threads); > #endif
On Thu, Jan 22, 2015 at 04:16:01PM -0600, Michael Roth wrote: > Quoting Bharata B Rao (2015-01-08 00:10:13) > > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > + DeviceState *dev, Error **errp) > > +{ > > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > > + if (dev->hotplugged) { > > Maybe just > > if (dev->hotplugged && spapr->dr_cpu_enabled) { > ... > > Would do it This is a common ->plug() handler and would be used for memory too. Hence there is a need to identify the type of object (CPU or memory) and handle it differently. Regards, Bharata.
Quoting Bharata B Rao (2015-01-27 22:19:56) > On Thu, Jan 22, 2015 at 04:16:01PM -0600, Michael Roth wrote: > > Quoting Bharata B Rao (2015-01-08 00:10:13) > > > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > > + DeviceState *dev, Error **errp) > > > +{ > > > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > > > + if (dev->hotplugged) { > > > > Maybe just > > > > if (dev->hotplugged && spapr->dr_cpu_enabled) { > > ... > > > > Would do it > > This is a common ->plug() handler and would be used for memory too. Hence > there is a need to identify the type of object (CPU or memory) and handle it > differently. I mean in terms of the "/* TODO: Check if DR is enabled ? */". Adding this check here, as well as during spapr_dr_connector_new(), should cover all the cases where the value needs to be checked AFAICT. > > Regards, > Bharata.
On Thu, Jan 08, 2015 at 11:40:13AM +0530, Bharata B Rao wrote: > Support CPU hotplug via device-add command. Use the exising EPOW event > infrastructure to send CPU hotplug notification to the guest. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_events.c | 8 +- > target-ppc/translate_init.c | 6 ++ > 3 files changed, 215 insertions(+), 4 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 515d770..a293a59 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) > g_string_append_len(s, s1, strlen(s1) + 1); > } > > +uint32_t cpus_per_socket; > + > static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; > QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); > unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; > - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > char *buf; > > + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > add_str(hypertas, "hcall-pft"); > add_str(hypertas, "hcall-term"); > add_str(hypertas, "hcall-dabr"); > @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) > } > } > > +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ Uh, yeah, you should fix this. I think you probably want to move the filling out of the cpu dt information from create_fdt_skel() to finalize_fdt(), then you should be able to use a common helper function. > +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, > + int drc_index) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + int index = ppc_get_vcpu_dt_id(cpu); > + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > + 0xffffffff, 0xffffffff}; > + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; > + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; > + uint32_t page_sizes_prop[64]; > + size_t page_sizes_prop_size; > + int smpt = ppc_get_compat_smt_threads(cpu); > + uint32_t servers_prop[smpt]; > + uint32_t gservers_prop[smpt * 2]; > + int i; > + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > + uint32_t associativity[] = {cpu_to_be32(0x5), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(0x0), > + cpu_to_be32(cs->numa_node), > + cpu_to_be32(index)}; > + > + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); > + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > + env->icache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > + env->icache_line_size))); > + > + if (pcc->l1_dcache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > + pcc->l1_dcache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); > + } > + if (pcc->l1_icache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > + pcc->l1_icache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > + > + if (env->spr_cb[SPR_PURR].oea_read) { > + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > + } > + > + if (env->mmu_model & POWERPC_MMU_1TSEG) { > + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > + segs, sizeof(segs)))); > + } > + > + /* Advertise VMX/VSX (vector extensions) if available > + * 0 / no property == no vector extensions > + * 1 == VMX / Altivec available > + * 2 == VSX available */ > + if (env->insns_flags & PPC_ALTIVEC) { > + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > + } > + > + /* Advertise DFP (Decimal Floating Point) if available > + * 0 / no property == no DFP > + * 1 == DFP available */ > + if (env->insns_flags2 & PPC2_DFP) { > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > + } > + > + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, > + sizeof(page_sizes_prop)); > + if (page_sizes_prop_size) { > + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > + page_sizes_prop, page_sizes_prop_size))); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", > + cs->cpu_index / cpus_per_socket))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); > + > + /* Build interrupt servers and gservers properties */ > + for (i = 0; i < smpt; i++) { > + servers_prop[i] = cpu_to_be32(index + i); > + /* Hack, direct the group queues back to cpu 0 */ > + gservers_prop[i*2] = cpu_to_be32(index + i); > + gservers_prop[i*2 + 1] = 0; > + } > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > + servers_prop, sizeof(servers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > + gservers_prop, sizeof(gservers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", > + pft_size_prop, sizeof(pft_size_prop))); > + > + if (nb_numa_nodes > 1) { > + _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity, > + sizeof(associativity))); > + } > +} > + > +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > + int id = ppc_get_vcpu_dt_id(cpu); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + int drc_index = drck->get_index(drc); > + void *fdt, *fdt_orig; > + int offset, i; > + char *nodename; > + > + /* add OF node for CPU and required OF DT properties */ Misleading comment, it's not really adding a node to anything but rather creating an fdt fragment which the DR code can send to the guest. > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); So both the PCI and cpu hotplug code now have this idiom. I think we want a common helper that allocates some memory and populates it with an empty fdt fragment. That will mean there's only one place to fix up when we pull in a newer fdt which has a built in to do most of that. > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); And like the PCI version, you don't need second malloc() - fdt_open_into() will work in place. > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > + > + offset = fdt_add_subnode(fdt, offset, nodename); > + > + /* Set NUMA node for the added CPU */ > + for (i = 0; i < nb_numa_nodes; i++) { > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > + cs->numa_node = i; > + break; > + } > + } > + > + spapr_populate_cpu_dt(cs, fdt, offset, drc_index); > + g_free(fdt_orig); > + g_free(nodename); > + > + drck->attach(drc, dev, fdt, offset, false); > +} > + > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + Error *local_err = NULL; > + CPUState *cs = CPU(dev); > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + sPAPRDRConnector *drc = > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); > + > + /* TODO: Check if DR is enabled ? */ Again, that's a TODO that shouldn't be postponed. > + g_assert(drc); Especially since I think this could cause an assertion crash if DRC is disabled. > + > + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); > + spapr_cpu_hotplug_add(dev, cs); > + spapr_hotplug_req_add_event(drc); > + error_propagate(errp, local_err); Nothing in the code uses local_err, so I think this is redundant. > + return; > +} > + > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + if (dev->hotplugged) { > + spapr_cpu_plug(hotplug_dev, dev, errp); > + } > + } > +} > + > +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, > + DeviceState *dev) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > + return HOTPLUG_HANDLER(machine); > + } > + return NULL; > +} > + > static void spapr_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); > FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); > NMIClass *nc = NMI_CLASS(oc); > + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); > > mc->init = ppc_spapr_init; > mc->reset = ppc_spapr_reset; > @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > mc->default_boot_order = NULL; > mc->kvm_type = spapr_kvm_type; > mc->has_dynamic_sysbus = true; > + mc->get_hotplug_handler = spapr_get_hotpug_handler; > + > + hc->plug = spapr_machine_device_plug; > smc->dr_phb_enabled = false; > smc->dr_cpu_enabled = false; > smc->dr_lmb_enabled = false; > @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = { > .interfaces = (InterfaceInfo[]) { > { TYPE_FW_PATH_PROVIDER }, > { TYPE_NMI }, > + { TYPE_HOTPLUG_HANDLER }, > { } > }, > }; > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 434a75d..035d8c9 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) > hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); > hp->hdr.section_version = 1; /* includes extended modifier */ > hp->hotplug_action = hp_action; > - > + hp->drc.index = cpu_to_be32(drck->get_index(drc)); > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > > switch (drc_type) { > case SPAPR_DR_CONNECTOR_TYPE_PCI: > - hp->drc.index = cpu_to_be32(drck->get_index(drc)); > - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; > break; > + case SPAPR_DR_CONNECTOR_TYPE_CPU: > + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; > + break; > default: > /* skip notification for unknown connector types */ > g_free(new_hp); > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > index 9c642a5..cf9d8d3 100644 > --- a/target-ppc/translate_init.c > +++ b/target-ppc/translate_init.c > @@ -32,6 +32,7 @@ > #include "hw/qdev-properties.h" > #include "hw/ppc/spapr.h" > #include "hw/ppc/ppc.h" > +#include "sysemu/sysemu.h" > > //#define PPC_DUMP_CPU > //#define PPC_DEBUG_SPR > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) > return; > } > > + if (cs->cpu_index >= max_cpus) { > + error_setg(errp, "Can't have more CPUs, maxcpus limit reached"); > + return; > + } > + > cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt > + (cs->cpu_index % smp_threads); > #endif
On Fri, Jan 23, 2015 at 01:41:38PM +0100, Igor Mammedov wrote: > On Thu, 8 Jan 2015 11:40:13 +0530 > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > Support CPU hotplug via device-add command. Use the exising EPOW event > > infrastructure to send CPU hotplug notification to the guest. > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > --- > > hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- > > hw/ppc/spapr_events.c | 8 +- > > target-ppc/translate_init.c | 6 ++ > > 3 files changed, 215 insertions(+), 4 deletions(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 515d770..a293a59 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) > > g_string_append_len(s, s1, strlen(s1) + 1); > > } > > > > +uint32_t cpus_per_socket; > static ??? Sure. > > + > > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > + Error **errp) > > +{ > > + Error *local_err = NULL; > > + CPUState *cs = CPU(dev); > > + PowerPCCPU *cpu = POWERPC_CPU(cs); > > + sPAPRDRConnector *drc = > > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); > just rant: does this have any relation to hotplug_dev, the point here is to get > these data from hotplug_dev object/some child of it rather then via direct adhoc call. I see how hotplug_dev is being used to pass on the plug request to ACPI, but have to check how hotplug_dev can be used more meaningfully here. > > > + > > + /* TODO: Check if DR is enabled ? */ > > + g_assert(drc); > > + > > + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); > reset probably should be don at realize time, see x86_cpu_realizefn() for example. Yes, can be done. > > > + spapr_cpu_hotplug_add(dev, cs); > > + spapr_hotplug_req_add_event(drc); > > + error_propagate(errp, local_err); > > + return; > > +} > > + > > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > + DeviceState *dev, Error **errp) > > +{ > > + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { > > + if (dev->hotplugged) { > > + spapr_cpu_plug(hotplug_dev, dev, errp); > Would be nicer if this could do cold-plugged CPUs wiring too. Yes, will check and see how intrusive change that would be. > > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > > index 9c642a5..cf9d8d3 100644 > > --- a/target-ppc/translate_init.c > > +++ b/target-ppc/translate_init.c > > @@ -32,6 +32,7 @@ > > #include "hw/qdev-properties.h" > > #include "hw/ppc/spapr.h" > > #include "hw/ppc/ppc.h" > > +#include "sysemu/sysemu.h" > > > > //#define PPC_DUMP_CPU > > //#define PPC_DEBUG_SPR > > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) > > return; > > } > > > > + if (cs->cpu_index >= max_cpus) { > pls note that cpu_index is monotonically increases, so when you do unplug > and then plug it will go above max_cpus or the same will happen if > one device_add fails in the middle, the next CPU add will fail because of > cs->cpu_index goes overboard. > > I'd suggest not to rely/use cpu_index for any purposes and use other means > to identify where cpu is plugged in. On x68 we slowly getting rid of this > dependency in favor of apic_id (topology information), eventually it could > become: > -device cpu_foo,socket=X,core=Y[,thread=Z][,node=N] > > you probably could do the same. > It doesn't have to be in this series, just be aware of potential issues. I see your point and this needs to be fixed as I see this causing problems with CPU removal (from the middle) and subsequent addition (which makes use of "vcpu fd parking and reuse" mechanism). Thanks for your review. Regards, Bharata.
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 515d770..a293a59 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1) g_string_append_len(s, s1, strlen(s1) + 1); } +uint32_t cpus_per_socket; + static void *spapr_create_fdt_skel(hwaddr initrd_base, hwaddr initrd_size, hwaddr kernel_size, @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; char *buf; + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; add_str(hypertas, "hcall-pft"); add_str(hypertas, "hcall-term"); add_str(hypertas, "hcall-dabr"); @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) } } +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, + int drc_index) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + int smpt = ppc_get_compat_smt_threads(cpu); + uint32_t servers_prop[smpt]; + uint32_t gservers_prop[smpt * 2]; + int i; + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + uint32_t associativity[] = {cpu_to_be32(0x5), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(cs->numa_node), + cpu_to_be32(index)}; + + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (env->spr_cb[SPR_PURR].oea_read) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", + cs->cpu_index / cpus_per_socket))); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); + + /* Build interrupt servers and gservers properties */ + for (i = 0; i < smpt; i++) { + servers_prop[i] = cpu_to_be32(index + i); + /* Hack, direct the group queues back to cpu 0 */ + gservers_prop[i*2] = cpu_to_be32(index + i); + gservers_prop[i*2 + 1] = 0; + } + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop))); + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", + gservers_prop, sizeof(gservers_prop))); + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop))); + + if (nb_numa_nodes > 1) { + _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity))); + } +} + +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int id = ppc_get_vcpu_dt_id(cpu); + sPAPRDRConnector *drc = + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + int drc_index = drck->get_index(drc); + void *fdt, *fdt_orig; + int offset, i; + char *nodename; + + /* add OF node for CPU and required OF DT properties */ + fdt_orig = g_malloc0(FDT_MAX_SIZE); + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); + fdt_begin_node(fdt_orig, ""); + fdt_end_node(fdt_orig); + fdt_finish(fdt_orig); + + fdt = g_malloc0(FDT_MAX_SIZE); + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); + + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); + + offset = fdt_add_subnode(fdt, offset, nodename); + + /* Set NUMA node for the added CPU */ + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { + cs->numa_node = i; + break; + } + } + + spapr_populate_cpu_dt(cs, fdt, offset, drc_index); + g_free(fdt_orig); + g_free(nodename); + + drck->attach(drc, dev, fdt, offset, false); +} + +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + Error *local_err = NULL; + CPUState *cs = CPU(dev); + PowerPCCPU *cpu = POWERPC_CPU(cs); + sPAPRDRConnector *drc = + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id); + + /* TODO: Check if DR is enabled ? */ + g_assert(drc); + + spapr_cpu_reset(POWERPC_CPU(CPU(dev))); + spapr_cpu_hotplug_add(dev, cs); + spapr_hotplug_req_add_event(drc); + error_propagate(errp, local_err); + return; +} + +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + if (dev->hotplugged) { + spapr_cpu_plug(hotplug_dev, dev, errp); + } + } +} + +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = ppc_spapr_init; mc->reset = ppc_spapr_reset; @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->default_boot_order = NULL; mc->kvm_type = spapr_kvm_type; mc->has_dynamic_sysbus = true; + mc->get_hotplug_handler = spapr_get_hotpug_handler; + + hc->plug = spapr_machine_device_plug; smc->dr_phb_enabled = false; smc->dr_cpu_enabled = false; smc->dr_lmb_enabled = false; @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = { .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER }, { } }, }; diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 434a75d..035d8c9 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); hp->hdr.section_version = 1; /* includes extended modifier */ hp->hotplug_action = hp_action; - + hp->drc.index = cpu_to_be32(drck->get_index(drc)); + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; switch (drc_type) { case SPAPR_DR_CONNECTOR_TYPE_PCI: - hp->drc.index = cpu_to_be32(drck->get_index(drc)); - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; + break; default: /* skip notification for unknown connector types */ g_free(new_hp); diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 9c642a5..cf9d8d3 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -32,6 +32,7 @@ #include "hw/qdev-properties.h" #include "hw/ppc/spapr.h" #include "hw/ppc/ppc.h" +#include "sysemu/sysemu.h" //#define PPC_DUMP_CPU //#define PPC_DEBUG_SPR @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) return; } + if (cs->cpu_index >= max_cpus) { + error_setg(errp, "Can't have more CPUs, maxcpus limit reached"); + return; + } + cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt + (cs->cpu_index % smp_threads); #endif
Support CPU hotplug via device-add command. Use the exising EPOW event infrastructure to send CPU hotplug notification to the guest. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- hw/ppc/spapr.c | 205 +++++++++++++++++++++++++++++++++++++++++++- hw/ppc/spapr_events.c | 8 +- target-ppc/translate_init.c | 6 ++ 3 files changed, 215 insertions(+), 4 deletions(-)