diff mbox

[RFC,v1,06/13] spapr: CPU hotplug support

Message ID 1420697420-16053-7-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao Jan. 8, 2015, 6:10 a.m. UTC
Support CPU hotplug via device-add command. Use the exising EPOW event
infrastructure to send CPU hotplug notification to the guest.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c              | 205 +++++++++++++++++++++++++++++++++++++++++++-
 hw/ppc/spapr_events.c       |   8 +-
 target-ppc/translate_init.c |   6 ++
 3 files changed, 215 insertions(+), 4 deletions(-)

Comments

Michael Roth Jan. 22, 2015, 10:16 p.m. UTC | #1
Quoting Bharata B Rao (2015-01-08 00:10:13)
> Support CPU hotplug via device-add command. Use the exising EPOW event
> infrastructure to send CPU hotplug notification to the guest.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c              | 205 +++++++++++++++++++++++++++++++++++++++++++-
>  hw/ppc/spapr_events.c       |   8 +-
>  target-ppc/translate_init.c |   6 ++
>  3 files changed, 215 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 515d770..a293a59 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1)
>      g_string_append_len(s, s1, strlen(s1) + 1);
>  }
> 
> +uint32_t cpus_per_socket;
> +
>  static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     hwaddr initrd_size,
>                                     hwaddr kernel_size,
> @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
>      QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
>      unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      char *buf;
> 
> +    cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      add_str(hypertas, "hcall-pft");
>      add_str(hypertas, "hcall-term");
>      add_str(hypertas, "hcall-dabr");
> @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
>      }
>  }
> 
> +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */
> +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
> +            int drc_index)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    int index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    int smpt = ppc_get_compat_smt_threads(cpu);
> +    uint32_t servers_prop[smpt];
> +    uint32_t gservers_prop[smpt * 2];
> +    int i;
> +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    uint32_t associativity[] = {cpu_to_be32(0x5),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(cs->numa_node),
> +                                cpu_to_be32(index)};
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> +                        env->icache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> +            pcc->l1_dcache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> +            pcc->l1_icache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> +                                cs->cpu_index / cpus_per_socket)));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +
> +    /* Build interrupt servers and gservers properties */
> +    for (i = 0; i < smpt; i++) {
> +        servers_prop[i] = cpu_to_be32(index + i);
> +        /* Hack, direct the group queues back to cpu 0 */
> +        gservers_prop[i*2] = cpu_to_be32(index + i);
> +        gservers_prop[i*2 + 1] = 0;
> +    }
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
> +                       servers_prop, sizeof(servers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
> +                      gservers_prop, sizeof(gservers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,pft-size",
> +                          pft_size_prop, sizeof(pft_size_prop)));
> +
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> +                          sizeof(associativity)));
> +    }
> +}
> +
> +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    int drc_index = drck->get_index(drc);
> +    void *fdt, *fdt_orig;
> +    int offset, i;
> +    char *nodename;
> +
> +    /* add OF node for CPU and required OF DT properties */
> +    fdt_orig = g_malloc0(FDT_MAX_SIZE);
> +    offset = fdt_create(fdt_orig, FDT_MAX_SIZE);
> +    fdt_begin_node(fdt_orig, "");
> +    fdt_end_node(fdt_orig);
> +    fdt_finish(fdt_orig);
> +
> +    fdt = g_malloc0(FDT_MAX_SIZE);
> +    fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE);
> +
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +
> +    offset = fdt_add_subnode(fdt, offset, nodename);
> +
> +    /* Set NUMA node for the added CPU */
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +            cs->numa_node = i;
> +            break;
> +        }
> +    }
> +
> +    spapr_populate_cpu_dt(cs, fdt, offset, drc_index);
> +    g_free(fdt_orig);
> +    g_free(nodename);
> +
> +    drck->attach(drc, dev, fdt, offset, false);
> +}
> +
> +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                            Error **errp)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs = CPU(dev);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id);
> +
> +    /* TODO: Check if DR is enabled ? */

Er, I take back my comment from earlier, you do also need to check for
spapr->dr_cpu_enabled below, since in the case of PCI it's enabled on a per-PHB
basis, whereas here it's a machine-wide option...

> +    g_assert(drc);
> +
> +    spapr_cpu_reset(POWERPC_CPU(CPU(dev)));
> +    spapr_cpu_hotplug_add(dev, cs);
> +    spapr_hotplug_req_add_event(drc);
> +    error_propagate(errp, local_err);
> +    return;
> +}
> +
> +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> +                                      DeviceState *dev, Error **errp)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        if (dev->hotplugged) {

Maybe just

           if (dev->hotplugged && spapr->dr_cpu_enabled) {
               ...

Would do it

> +            spapr_cpu_plug(hotplug_dev, dev, errp);
> +        }
> +    }
> +}
> +
> +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
> +                                             DeviceState *dev)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        return HOTPLUG_HANDLER(machine);
> +    }
> +    return NULL;
> +}
> +
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
>      sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
>      FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
>      NMIClass *nc = NMI_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> 
>      mc->init = ppc_spapr_init;
>      mc->reset = ppc_spapr_reset;
> @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      mc->default_boot_order = NULL;
>      mc->kvm_type = spapr_kvm_type;
>      mc->has_dynamic_sysbus = true;
> +    mc->get_hotplug_handler = spapr_get_hotpug_handler;
> +
> +    hc->plug = spapr_machine_device_plug;
>      smc->dr_phb_enabled = false;
>      smc->dr_cpu_enabled = false;
>      smc->dr_lmb_enabled = false;
> @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = {
>      .interfaces = (InterfaceInfo[]) {
>          { TYPE_FW_PATH_PROVIDER },
>          { TYPE_NMI },
> +        { TYPE_HOTPLUG_HANDLER },
>          { }
>      },
>  };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 434a75d..035d8c9 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
>      hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
>      hp->hdr.section_version = 1; /* includes extended modifier */
>      hp->hotplug_action = hp_action;
> -
> +    hp->drc.index = cpu_to_be32(drck->get_index(drc));
> +    hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
> 
>      switch (drc_type) {
>      case SPAPR_DR_CONNECTOR_TYPE_PCI:
> -        hp->drc.index = cpu_to_be32(drck->get_index(drc));
> -        hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
>          hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
>          break;
> +    case SPAPR_DR_CONNECTOR_TYPE_CPU:
> +        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
> +        break;
>      default:
>          /* skip notification for unknown connector types */
>          g_free(new_hp);
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 9c642a5..cf9d8d3 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -32,6 +32,7 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/ppc/spapr.h"
>  #include "hw/ppc/ppc.h"
> +#include "sysemu/sysemu.h"
> 
>  //#define PPC_DUMP_CPU
>  //#define PPC_DEBUG_SPR
> @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>          return;
>      }
> 
> +    if (cs->cpu_index >= max_cpus) {
> +        error_setg(errp, "Can't have more CPUs, maxcpus limit reached");
> +        return;
> +    }
> +
>      cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
>          + (cs->cpu_index % smp_threads);
>  #endif
> -- 
> 2.1.0
Igor Mammedov Jan. 23, 2015, 12:41 p.m. UTC | #2
On Thu,  8 Jan 2015 11:40:13 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> Support CPU hotplug via device-add command. Use the exising EPOW event
> infrastructure to send CPU hotplug notification to the guest.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c              | 205 +++++++++++++++++++++++++++++++++++++++++++-
>  hw/ppc/spapr_events.c       |   8 +-
>  target-ppc/translate_init.c |   6 ++
>  3 files changed, 215 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 515d770..a293a59 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1)
>      g_string_append_len(s, s1, strlen(s1) + 1);
>  }
>  
> +uint32_t cpus_per_socket;
static ???

> +
>  static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     hwaddr initrd_size,
>                                     hwaddr kernel_size,
> @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
>      QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
>      unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      char *buf;
>  
> +    cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      add_str(hypertas, "hcall-pft");
>      add_str(hypertas, "hcall-term");
>      add_str(hypertas, "hcall-dabr");
> @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
>      }
>  }
>  
> +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */
> +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
> +            int drc_index)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    int index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    int smpt = ppc_get_compat_smt_threads(cpu);
> +    uint32_t servers_prop[smpt];
> +    uint32_t gservers_prop[smpt * 2];
> +    int i;
> +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    uint32_t associativity[] = {cpu_to_be32(0x5),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(cs->numa_node),
> +                                cpu_to_be32(index)};
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> +                        env->icache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> +            pcc->l1_dcache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> +            pcc->l1_icache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> +                                cs->cpu_index / cpus_per_socket)));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +
> +    /* Build interrupt servers and gservers properties */
> +    for (i = 0; i < smpt; i++) {
> +        servers_prop[i] = cpu_to_be32(index + i);
> +        /* Hack, direct the group queues back to cpu 0 */
> +        gservers_prop[i*2] = cpu_to_be32(index + i);
> +        gservers_prop[i*2 + 1] = 0;
> +    }
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
> +                       servers_prop, sizeof(servers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
> +                      gservers_prop, sizeof(gservers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,pft-size",
> +                          pft_size_prop, sizeof(pft_size_prop)));
> +
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> +                          sizeof(associativity)));
> +    }
> +}
> +
> +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    int drc_index = drck->get_index(drc);
> +    void *fdt, *fdt_orig;
> +    int offset, i;
> +    char *nodename;
> +
> +    /* add OF node for CPU and required OF DT properties */
> +    fdt_orig = g_malloc0(FDT_MAX_SIZE);
> +    offset = fdt_create(fdt_orig, FDT_MAX_SIZE);
> +    fdt_begin_node(fdt_orig, "");
> +    fdt_end_node(fdt_orig);
> +    fdt_finish(fdt_orig);
> +
> +    fdt = g_malloc0(FDT_MAX_SIZE);
> +    fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE);
> +
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +
> +    offset = fdt_add_subnode(fdt, offset, nodename);
> +
> +    /* Set NUMA node for the added CPU */
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +            cs->numa_node = i;
> +            break;
> +        }
> +    }
> +
> +    spapr_populate_cpu_dt(cs, fdt, offset, drc_index);
> +    g_free(fdt_orig);
> +    g_free(nodename);
> +
> +    drck->attach(drc, dev, fdt, offset, false);
> +}
> +
> +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                            Error **errp)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs = CPU(dev);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id);
just rant: does this have any relation to hotplug_dev, the point here is to get
these data from hotplug_dev object/some child of it rather then via direct adhoc call.

> +
> +    /* TODO: Check if DR is enabled ? */
> +    g_assert(drc);
> +
> +    spapr_cpu_reset(POWERPC_CPU(CPU(dev)));
reset probably should be don at realize time, see x86_cpu_realizefn() for example.

> +    spapr_cpu_hotplug_add(dev, cs);
> +    spapr_hotplug_req_add_event(drc);
> +    error_propagate(errp, local_err);
> +    return;
> +}
> +
> +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> +                                      DeviceState *dev, Error **errp)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        if (dev->hotplugged) {
> +            spapr_cpu_plug(hotplug_dev, dev, errp);
Would be nicer if this could do cold-plugged CPUs wiring too.

> +        }
> +    }
> +}
> +
> +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
> +                                             DeviceState *dev)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        return HOTPLUG_HANDLER(machine);
> +    }
> +    return NULL;
> +}
> +
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
>      sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
>      FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
>      NMIClass *nc = NMI_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>  
>      mc->init = ppc_spapr_init;
>      mc->reset = ppc_spapr_reset;
> @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      mc->default_boot_order = NULL;
>      mc->kvm_type = spapr_kvm_type;
>      mc->has_dynamic_sysbus = true;
> +    mc->get_hotplug_handler = spapr_get_hotpug_handler;
> +
> +    hc->plug = spapr_machine_device_plug;
>      smc->dr_phb_enabled = false;
>      smc->dr_cpu_enabled = false;
>      smc->dr_lmb_enabled = false;
> @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = {
>      .interfaces = (InterfaceInfo[]) {
>          { TYPE_FW_PATH_PROVIDER },
>          { TYPE_NMI },
> +        { TYPE_HOTPLUG_HANDLER },
>          { }
>      },
>  };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 434a75d..035d8c9 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
>      hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
>      hp->hdr.section_version = 1; /* includes extended modifier */
>      hp->hotplug_action = hp_action;
> -
> +    hp->drc.index = cpu_to_be32(drck->get_index(drc));
> +    hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
>  
>      switch (drc_type) {
>      case SPAPR_DR_CONNECTOR_TYPE_PCI:
> -        hp->drc.index = cpu_to_be32(drck->get_index(drc));
> -        hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
>          hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
>          break;
> +    case SPAPR_DR_CONNECTOR_TYPE_CPU:
> +        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
> +        break;
>      default:
>          /* skip notification for unknown connector types */
>          g_free(new_hp);
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 9c642a5..cf9d8d3 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -32,6 +32,7 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/ppc/spapr.h"
>  #include "hw/ppc/ppc.h"
> +#include "sysemu/sysemu.h"
>  
>  //#define PPC_DUMP_CPU
>  //#define PPC_DEBUG_SPR
> @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>          return;
>      }
>  
> +    if (cs->cpu_index >= max_cpus) {
pls note that cpu_index is monotonically increases, so when you do unplug
and then plug it will go above max_cpus or the same will happen if
one device_add fails in the middle, the next CPU add will fail because of
cs->cpu_index goes overboard.

I'd suggest not to rely/use cpu_index for any purposes and use other means
to identify where cpu is plugged in. On x68 we slowly getting rid of this
dependency in favor of apic_id (topology information), eventually it could
become:
  -device cpu_foo,socket=X,core=Y[,thread=Z][,node=N]

you probably could do the same.
It doesn't have to be in this series, just be aware of potential issues.


> +        error_setg(errp, "Can't have more CPUs, maxcpus limit reached");
> +        return;
> +    }
> +
>      cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
>          + (cs->cpu_index % smp_threads);
>  #endif
Bharata B Rao Jan. 28, 2015, 4:19 a.m. UTC | #3
On Thu, Jan 22, 2015 at 04:16:01PM -0600, Michael Roth wrote:
> Quoting Bharata B Rao (2015-01-08 00:10:13)
> > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> > +                                      DeviceState *dev, Error **errp)
> > +{
> > +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> > +        if (dev->hotplugged) {
> 
> Maybe just
> 
>            if (dev->hotplugged && spapr->dr_cpu_enabled) {
>                ...
> 
> Would do it

This is a common ->plug() handler and would be used for memory too. Hence
there is a need to identify the type of object (CPU or memory) and handle it
differently.

Regards,
Bharata.
Michael Roth Jan. 28, 2015, 5:41 a.m. UTC | #4
Quoting Bharata B Rao (2015-01-27 22:19:56)
> On Thu, Jan 22, 2015 at 04:16:01PM -0600, Michael Roth wrote:
> > Quoting Bharata B Rao (2015-01-08 00:10:13)
> > > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> > > +                                      DeviceState *dev, Error **errp)
> > > +{
> > > +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> > > +        if (dev->hotplugged) {
> > 
> > Maybe just
> > 
> >            if (dev->hotplugged && spapr->dr_cpu_enabled) {
> >                ...
> > 
> > Would do it
> 
> This is a common ->plug() handler and would be used for memory too. Hence
> there is a need to identify the type of object (CPU or memory) and handle it
> differently.

I mean in terms of the "/* TODO: Check if DR is enabled ? */". Adding this
check here, as well as during spapr_dr_connector_new(), should cover all
the cases where the value needs to be checked AFAICT.

> 
> Regards,
> Bharata.
David Gibson Jan. 29, 2015, 1:31 a.m. UTC | #5
On Thu, Jan 08, 2015 at 11:40:13AM +0530, Bharata B Rao wrote:
> Support CPU hotplug via device-add command. Use the exising EPOW event
> infrastructure to send CPU hotplug notification to the guest.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c              | 205 +++++++++++++++++++++++++++++++++++++++++++-
>  hw/ppc/spapr_events.c       |   8 +-
>  target-ppc/translate_init.c |   6 ++
>  3 files changed, 215 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 515d770..a293a59 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1)
>      g_string_append_len(s, s1, strlen(s1) + 1);
>  }
>  
> +uint32_t cpus_per_socket;
> +
>  static void *spapr_create_fdt_skel(hwaddr initrd_base,
>                                     hwaddr initrd_size,
>                                     hwaddr kernel_size,
> @@ -350,9 +352,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
>      QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
>      unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      char *buf;
>  
> +    cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>      add_str(hypertas, "hcall-pft");
>      add_str(hypertas, "hcall-term");
>      add_str(hypertas, "hcall-dabr");
> @@ -1744,12 +1746,209 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
>      }
>  }
>  
> +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */

Uh, yeah, you should fix this.  I think you probably want to move the
filling out of the cpu dt information from create_fdt_skel() to
finalize_fdt(), then you should be able to use a common helper function.

> +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
> +            int drc_index)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +    int index = ppc_get_vcpu_dt_id(cpu);
> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +                       0xffffffff, 0xffffffff};
> +    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
> +    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
> +    uint32_t page_sizes_prop[64];
> +    size_t page_sizes_prop_size;
> +    int smpt = ppc_get_compat_smt_threads(cpu);
> +    uint32_t servers_prop[smpt];
> +    uint32_t gservers_prop[smpt * 2];
> +    int i;
> +    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    uint32_t associativity[] = {cpu_to_be32(0x5),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(cs->numa_node),
> +                                cpu_to_be32(index)};
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> +                        env->dcache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> +                        env->icache_line_size)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> +                            env->icache_line_size)));
> +
> +    if (pcc->l1_dcache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> +            pcc->l1_dcache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
> +    }
> +    if (pcc->l1_icache_size) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> +            pcc->l1_icache_size)));
> +    } else {
> +        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> +
> +    if (env->spr_cb[SPR_PURR].oea_read) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> +    }
> +
> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> +                           segs, sizeof(segs))));
> +    }
> +
> +    /* Advertise VMX/VSX (vector extensions) if available
> +     *   0 / no property == no vector extensions
> +     *   1               == VMX / Altivec available
> +     *   2               == VSX available */
> +    if (env->insns_flags & PPC_ALTIVEC) {
> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> +
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> +    }
> +
> +    /* Advertise DFP (Decimal Floating Point) if available
> +     *   0 / no property == no DFP
> +     *   1               == DFP available */
> +    if (env->insns_flags2 & PPC2_DFP) {
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> +    }
> +
> +    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
> +                                                  sizeof(page_sizes_prop));
> +    if (page_sizes_prop_size) {
> +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> +                           page_sizes_prop, page_sizes_prop_size)));
> +    }
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> +                                cs->cpu_index / cpus_per_socket)));
> +
> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +
> +    /* Build interrupt servers and gservers properties */
> +    for (i = 0; i < smpt; i++) {
> +        servers_prop[i] = cpu_to_be32(index + i);
> +        /* Hack, direct the group queues back to cpu 0 */
> +        gservers_prop[i*2] = cpu_to_be32(index + i);
> +        gservers_prop[i*2 + 1] = 0;
> +    }
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
> +                       servers_prop, sizeof(servers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
> +                      gservers_prop, sizeof(gservers_prop)));
> +    _FDT(fdt_setprop(fdt, offset, "ibm,pft-size",
> +                          pft_size_prop, sizeof(pft_size_prop)));
> +
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> +                          sizeof(associativity)));
> +    }
> +}
> +
> +static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> +    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    int drc_index = drck->get_index(drc);
> +    void *fdt, *fdt_orig;
> +    int offset, i;
> +    char *nodename;
> +
> +    /* add OF node for CPU and required OF DT properties */

Misleading comment, it's not really adding a node to anything but
rather creating an fdt fragment which the DR code can send to the
guest.

> +    fdt_orig = g_malloc0(FDT_MAX_SIZE);
> +    offset = fdt_create(fdt_orig, FDT_MAX_SIZE);
> +    fdt_begin_node(fdt_orig, "");
> +    fdt_end_node(fdt_orig);
> +    fdt_finish(fdt_orig);

So both the PCI and cpu hotplug code now have this idiom.  I think we
want a common helper that allocates some memory and populates it with
an empty fdt fragment.  That will mean there's only one place to fix
up when we pull in a newer fdt which has a built in to do most of
that.

> +    fdt = g_malloc0(FDT_MAX_SIZE);
> +    fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE);

And like the PCI version, you don't need second malloc() -
fdt_open_into() will work in place.

> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +
> +    offset = fdt_add_subnode(fdt, offset, nodename);
> +
> +    /* Set NUMA node for the added CPU */
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +            cs->numa_node = i;
> +            break;
> +        }
> +    }
> +
> +    spapr_populate_cpu_dt(cs, fdt, offset, drc_index);
> +    g_free(fdt_orig);
> +    g_free(nodename);
> +
> +    drck->attach(drc, dev, fdt, offset, false);
> +}
> +
> +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                            Error **errp)
> +{
> +    Error *local_err = NULL;
> +    CPUState *cs = CPU(dev);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id);
> +
> +    /* TODO: Check if DR is enabled ? */

Again, that's a TODO that shouldn't be postponed.

> +    g_assert(drc);

Especially since I think this could cause an assertion crash if DRC is disabled.

> +
> +    spapr_cpu_reset(POWERPC_CPU(CPU(dev)));
> +    spapr_cpu_hotplug_add(dev, cs);
> +    spapr_hotplug_req_add_event(drc);
> +    error_propagate(errp, local_err);

Nothing in the code uses local_err, so I think this is redundant.

> +    return;
> +}
> +
> +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> +                                      DeviceState *dev, Error **errp)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        if (dev->hotplugged) {
> +            spapr_cpu_plug(hotplug_dev, dev, errp);
> +        }
> +    }
> +}
> +
> +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
> +                                             DeviceState *dev)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        return HOTPLUG_HANDLER(machine);
> +    }
> +    return NULL;
> +}
> +
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
>      sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
>      FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
>      NMIClass *nc = NMI_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>  
>      mc->init = ppc_spapr_init;
>      mc->reset = ppc_spapr_reset;
> @@ -1759,6 +1958,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      mc->default_boot_order = NULL;
>      mc->kvm_type = spapr_kvm_type;
>      mc->has_dynamic_sysbus = true;
> +    mc->get_hotplug_handler = spapr_get_hotpug_handler;
> +
> +    hc->plug = spapr_machine_device_plug;
>      smc->dr_phb_enabled = false;
>      smc->dr_cpu_enabled = false;
>      smc->dr_lmb_enabled = false;
> @@ -1778,6 +1980,7 @@ static const TypeInfo spapr_machine_info = {
>      .interfaces = (InterfaceInfo[]) {
>          { TYPE_FW_PATH_PROVIDER },
>          { TYPE_NMI },
> +        { TYPE_HOTPLUG_HANDLER },
>          { }
>      },
>  };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 434a75d..035d8c9 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -364,14 +364,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
>      hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
>      hp->hdr.section_version = 1; /* includes extended modifier */
>      hp->hotplug_action = hp_action;
> -
> +    hp->drc.index = cpu_to_be32(drck->get_index(drc));
> +    hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
>  
>      switch (drc_type) {
>      case SPAPR_DR_CONNECTOR_TYPE_PCI:
> -        hp->drc.index = cpu_to_be32(drck->get_index(drc));
> -        hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
>          hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
>          break;
> +    case SPAPR_DR_CONNECTOR_TYPE_CPU:
> +        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
> +        break;
>      default:
>          /* skip notification for unknown connector types */
>          g_free(new_hp);
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 9c642a5..cf9d8d3 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -32,6 +32,7 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/ppc/spapr.h"
>  #include "hw/ppc/ppc.h"
> +#include "sysemu/sysemu.h"
>  
>  //#define PPC_DUMP_CPU
>  //#define PPC_DEBUG_SPR
> @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>          return;
>      }
>  
> +    if (cs->cpu_index >= max_cpus) {
> +        error_setg(errp, "Can't have more CPUs, maxcpus limit reached");
> +        return;
> +    }
> +
>      cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
>          + (cs->cpu_index % smp_threads);
>  #endif
Bharata B Rao Jan. 30, 2015, 6:59 a.m. UTC | #6
On Fri, Jan 23, 2015 at 01:41:38PM +0100, Igor Mammedov wrote:
> On Thu,  8 Jan 2015 11:40:13 +0530
> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > Support CPU hotplug via device-add command. Use the exising EPOW event
> > infrastructure to send CPU hotplug notification to the guest.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > ---
> >  hw/ppc/spapr.c              | 205 +++++++++++++++++++++++++++++++++++++++++++-
> >  hw/ppc/spapr_events.c       |   8 +-
> >  target-ppc/translate_init.c |   6 ++
> >  3 files changed, 215 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 515d770..a293a59 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -330,6 +330,8 @@ static void add_str(GString *s, const gchar *s1)
> >      g_string_append_len(s, s1, strlen(s1) + 1);
> >  }
> >  
> > +uint32_t cpus_per_socket;
> static ???

Sure.

> > +
> > +static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > +                            Error **errp)
> > +{
> > +    Error *local_err = NULL;
> > +    CPUState *cs = CPU(dev);
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    sPAPRDRConnector *drc =
> > +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id);
> just rant: does this have any relation to hotplug_dev, the point here is to get
> these data from hotplug_dev object/some child of it rather then via direct adhoc call.

I see how hotplug_dev is being used to pass on the plug request to ACPI, but
have to check how hotplug_dev can be used more meaningfully here.

> 
> > +
> > +    /* TODO: Check if DR is enabled ? */
> > +    g_assert(drc);
> > +
> > +    spapr_cpu_reset(POWERPC_CPU(CPU(dev)));
> reset probably should be don at realize time, see x86_cpu_realizefn() for example.

Yes, can be done.

> 
> > +    spapr_cpu_hotplug_add(dev, cs);
> > +    spapr_hotplug_req_add_event(drc);
> > +    error_propagate(errp, local_err);
> > +    return;
> > +}
> > +
> > +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> > +                                      DeviceState *dev, Error **errp)
> > +{
> > +    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> > +        if (dev->hotplugged) {
> > +            spapr_cpu_plug(hotplug_dev, dev, errp);
> Would be nicer if this could do cold-plugged CPUs wiring too.

Yes, will check and see how intrusive change that would be.

> > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> > index 9c642a5..cf9d8d3 100644
> > --- a/target-ppc/translate_init.c
> > +++ b/target-ppc/translate_init.c
> > @@ -32,6 +32,7 @@
> >  #include "hw/qdev-properties.h"
> >  #include "hw/ppc/spapr.h"
> >  #include "hw/ppc/ppc.h"
> > +#include "sysemu/sysemu.h"
> >  
> >  //#define PPC_DUMP_CPU
> >  //#define PPC_DEBUG_SPR
> > @@ -8909,6 +8910,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
> >          return;
> >      }
> >  
> > +    if (cs->cpu_index >= max_cpus) {
> pls note that cpu_index is monotonically increases, so when you do unplug
> and then plug it will go above max_cpus or the same will happen if
> one device_add fails in the middle, the next CPU add will fail because of
> cs->cpu_index goes overboard.
> 
> I'd suggest not to rely/use cpu_index for any purposes and use other means
> to identify where cpu is plugged in. On x68 we slowly getting rid of this
> dependency in favor of apic_id (topology information), eventually it could
> become:
>   -device cpu_foo,socket=X,core=Y[,thread=Z][,node=N]
> 
> you probably could do the same.
> It doesn't have to be in this series, just be aware of potential issues.

I see your point and this needs to be fixed as I see this causing problems
with CPU removal (from the middle) and subsequent addition (which makes
use of "vcpu fd parking and reuse" mechanism).

Thanks for your review.

Regards,
Bharata.
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 515d770..a293a59 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -330,6 +330,8 @@  static void add_str(GString *s, const gchar *s1)
     g_string_append_len(s, s1, strlen(s1) + 1);
 }
 
+uint32_t cpus_per_socket;
+
 static void *spapr_create_fdt_skel(hwaddr initrd_base,
                                    hwaddr initrd_size,
                                    hwaddr kernel_size,
@@ -350,9 +352,9 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
     unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
-    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
     char *buf;
 
+    cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
     add_str(hypertas, "hcall-pft");
     add_str(hypertas, "hcall-term");
     add_str(hypertas, "hcall-dabr");
@@ -1744,12 +1746,209 @@  static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
+/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */
+static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
+            int drc_index)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    int index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    int smpt = ppc_get_compat_smt_threads(cpu);
+    uint32_t servers_prop[smpt];
+    uint32_t gservers_prop[smpt * 2];
+    int i;
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+    uint32_t associativity[] = {cpu_to_be32(0x5),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(cs->numa_node),
+                                cpu_to_be32(index)};
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                        env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                        env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                        env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+            pcc->l1_dcache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+            pcc->l1_icache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+                                cs->cpu_index / cpus_per_socket)));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+
+    /* Build interrupt servers and gservers properties */
+    for (i = 0; i < smpt; i++) {
+        servers_prop[i] = cpu_to_be32(index + i);
+        /* Hack, direct the group queues back to cpu 0 */
+        gservers_prop[i*2] = cpu_to_be32(index + i);
+        gservers_prop[i*2 + 1] = 0;
+    }
+    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop)));
+    _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
+                      gservers_prop, sizeof(gservers_prop)));
+    _FDT(fdt_setprop(fdt, offset, "ibm,pft-size",
+                          pft_size_prop, sizeof(pft_size_prop)));
+
+    if (nb_numa_nodes > 1) {
+        _FDT(fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                          sizeof(associativity)));
+    }
+}
+
+static void spapr_cpu_hotplug_add(DeviceState *dev, CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    int id = ppc_get_vcpu_dt_id(cpu);
+    sPAPRDRConnector *drc =
+        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    int drc_index = drck->get_index(drc);
+    void *fdt, *fdt_orig;
+    int offset, i;
+    char *nodename;
+
+    /* add OF node for CPU and required OF DT properties */
+    fdt_orig = g_malloc0(FDT_MAX_SIZE);
+    offset = fdt_create(fdt_orig, FDT_MAX_SIZE);
+    fdt_begin_node(fdt_orig, "");
+    fdt_end_node(fdt_orig);
+    fdt_finish(fdt_orig);
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE);
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
+
+    offset = fdt_add_subnode(fdt, offset, nodename);
+
+    /* Set NUMA node for the added CPU */
+    for (i = 0; i < nb_numa_nodes; i++) {
+        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
+            cs->numa_node = i;
+            break;
+        }
+    }
+
+    spapr_populate_cpu_dt(cs, fdt, offset, drc_index);
+    g_free(fdt_orig);
+    g_free(nodename);
+
+    drck->attach(drc, dev, fdt, offset, false);
+}
+
+static void spapr_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp)
+{
+    Error *local_err = NULL;
+    CPUState *cs = CPU(dev);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    sPAPRDRConnector *drc =
+        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, cpu->cpu_dt_id);
+
+    /* TODO: Check if DR is enabled ? */
+    g_assert(drc);
+
+    spapr_cpu_reset(POWERPC_CPU(CPU(dev)));
+    spapr_cpu_hotplug_add(dev, cs);
+    spapr_hotplug_req_add_event(drc);
+    error_propagate(errp, local_err);
+    return;
+}
+
+static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+        if (dev->hotplugged) {
+            spapr_cpu_plug(hotplug_dev, dev, errp);
+        }
+    }
+}
+
+static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
+                                             DeviceState *dev)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+        return HOTPLUG_HANDLER(machine);
+    }
+    return NULL;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
     sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
     FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
     NMIClass *nc = NMI_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
     mc->init = ppc_spapr_init;
     mc->reset = ppc_spapr_reset;
@@ -1759,6 +1958,9 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->default_boot_order = NULL;
     mc->kvm_type = spapr_kvm_type;
     mc->has_dynamic_sysbus = true;
+    mc->get_hotplug_handler = spapr_get_hotpug_handler;
+
+    hc->plug = spapr_machine_device_plug;
     smc->dr_phb_enabled = false;
     smc->dr_cpu_enabled = false;
     smc->dr_lmb_enabled = false;
@@ -1778,6 +1980,7 @@  static const TypeInfo spapr_machine_info = {
     .interfaces = (InterfaceInfo[]) {
         { TYPE_FW_PATH_PROVIDER },
         { TYPE_NMI },
+        { TYPE_HOTPLUG_HANDLER },
         { }
     },
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 434a75d..035d8c9 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -364,14 +364,16 @@  static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
     hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
     hp->hdr.section_version = 1; /* includes extended modifier */
     hp->hotplug_action = hp_action;
-
+    hp->drc.index = cpu_to_be32(drck->get_index(drc));
+    hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
 
     switch (drc_type) {
     case SPAPR_DR_CONNECTOR_TYPE_PCI:
-        hp->drc.index = cpu_to_be32(drck->get_index(drc));
-        hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
         break;
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
+        break;
     default:
         /* skip notification for unknown connector types */
         g_free(new_hp);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 9c642a5..cf9d8d3 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -32,6 +32,7 @@ 
 #include "hw/qdev-properties.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/ppc.h"
+#include "sysemu/sysemu.h"
 
 //#define PPC_DUMP_CPU
 //#define PPC_DEBUG_SPR
@@ -8909,6 +8910,11 @@  static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (cs->cpu_index >= max_cpus) {
+        error_setg(errp, "Can't have more CPUs, maxcpus limit reached");
+        return;
+    }
+
     cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
         + (cs->cpu_index % smp_threads);
 #endif