diff mbox

[v6,10/11] spapr: CPU hotplug support

Message ID 1452236119-24452-11-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao Jan. 8, 2016, 6:55 a.m. UTC
Support CPU hotplug via device-add command like this:

(qemu) device_add powerpc64-cpu-core,id=core2

In response to device_add, CPU core device will be created. CPU core
device creates and realizes CPU thread devices. If the machine type
supports CPU hotplug, boot-time CPUs are created as CPU core devices
otherwise they continue to be created as individual CPU devices.

Set up device tree entries for the hotplugged CPU core and use the
exising EPOW event infrastructure to send CPU hotplug notification to
the guest.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c              | 183 ++++++++++++++++++++++++++++++++++++++++++--
 hw/ppc/spapr_events.c       |   3 +
 hw/ppc/spapr_rtas.c         |  24 ++++++
 include/hw/ppc/spapr.h      |   5 ++
 target-ppc/translate_init.c |   8 ++
 5 files changed, 216 insertions(+), 7 deletions(-)

Comments

David Gibson Jan. 12, 2016, 5:58 a.m. UTC | #1
On Fri, Jan 08, 2016 at 12:25:18PM +0530, Bharata B Rao wrote:
> Support CPU hotplug via device-add command like this:
> 
> (qemu) device_add powerpc64-cpu-core,id=core2
> 
> In response to device_add, CPU core device will be created. CPU core
> device creates and realizes CPU thread devices. If the machine type
> supports CPU hotplug, boot-time CPUs are created as CPU core devices
> otherwise they continue to be created as individual CPU devices.
> 
> Set up device tree entries for the hotplugged CPU core and use the
> exising EPOW event infrastructure to send CPU hotplug notification to
> the guest.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c              | 183 ++++++++++++++++++++++++++++++++++++++++++--
>  hw/ppc/spapr_events.c       |   3 +
>  hw/ppc/spapr_rtas.c         |  24 ++++++
>  include/hw/ppc/spapr.h      |   5 ++
>  target-ppc/translate_init.c |   8 ++
>  5 files changed, 216 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index a3ce1db..c2af9ca 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -63,6 +63,7 @@
>  
>  #include "hw/compat.h"
>  #include "qemu-common.h"
> +#include "hw/ppc/cpu-core.h"
>  
>  #include <libfdt.h>
>  
> @@ -600,6 +601,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>      size_t page_sizes_prop_size;
>      uint32_t vcpus_per_socket = smp_threads * smp_cores;
>      uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRDRConnector *drc;
> +    sPAPRDRConnectorClass *drck;
> +    int drc_index;
> +
> +    if (smc->dr_cpu_enabled) {
> +        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> +        g_assert(drc);
> +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +        drc_index = drck->get_index(drc);
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +    }
>  
>      /* Note: we keep CI large pages off for now because a 64K capable guest
>       * provisioned with large pages might otherwise try to map a qemu
> @@ -1743,6 +1756,8 @@ static void ppc_spapr_init(MachineState *machine)
>      char *filename;
>      int smt = kvmppc_smt_threads();
>      int smp_max_cores = max_cpus/smp_threads;
> +    int spapr_smp_cores = smp_cpus/smp_threads;
> +    Object *core;
>  
>      msi_supported = true;
>  
> @@ -1822,13 +1837,22 @@ static void ppc_spapr_init(MachineState *machine)
>      if (machine->cpu_model == NULL) {
>          machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
>      }
> -    for (i = 0; i < smp_cpus; i++) {
> -        cpu = cpu_ppc_init(machine->cpu_model);
> -        if (cpu == NULL) {
> -            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> -            exit(1);
> +
> +    if (smc->dr_cpu_enabled) {
> +        for (i = 0; i < spapr_smp_cores; i++) {
> +            core = object_new(TYPE_POWERPC_CPU_CORE);
> +            object_property_set_bool(core, true, "realized", &error_abort);
> +        }
> +    } else {
> +        for (i = 0; i < smp_cpus; i++) {
> +            cpu = cpu_ppc_init(machine->cpu_model);
> +            if (cpu == NULL) {
> +                fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> +                exit(1);
> +            }
> +            object_property_set_bool(OBJECT(cpu), true, "realized",
> +                                     &error_abort);
>          }
> -        spapr_cpu_init(spapr, cpu);
>      }
>  
>      if (kvm_enabled()) {
> @@ -2222,10 +2246,125 @@ out:
>      error_propagate(errp, local_err);
>  }
>  
> +static void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> +                                           int *fdt_offset,
> +                                           sPAPRMachineState *spapr)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    void *fdt;
> +    int offset, fdt_size;
> +    char *nodename;
> +
> +    fdt = create_device_tree(&fdt_size);
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +    offset = fdt_add_subnode(fdt, 0, nodename);
> +
> +    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
> +    g_free(nodename);
> +
> +    *fdt_offset = offset;
> +    return fdt;
> +}
> +
> +static int spapr_core_attach(Object *obj, void *opaque)
> +{
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
> +    sPAPRCoreState *core = opaque;
> +    DeviceState *dev = DEVICE(obj);
> +    CPUState *cs = CPU(dev);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> +    sPAPRDRConnectorClass *drck;
> +    int smt = kvmppc_smt_threads();
> +    Error *local_err = NULL;
> +    void *fdt = NULL;
> +    int fdt_offset = 0;
> +
> +    /*
> +     * Only main SMT thread (thread 0) will continue and signal the
> +     * hotplug event to the guest. Other threads of the core will
> +     * return from here.
> +     */
> +    if ((id % smt) != 0) {
> +        return 0;
> +    }
> +
> +    if (!smc->dr_cpu_enabled) {
> +        /*
> +         * This is a cold plugged CPU but the machine doesn't support
> +         * DR. So skip the hotplug path ensuring that the CPU is brought
> +         * up online with out an associated DR connector.
> +         */
> +        return 0;
> +    }
> +
> +    g_assert(drc);
> +
> +    /*
> +     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
> +     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
> +     */
> +    if (dev->hotplugged) {
> +        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
> +    }
> +
> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    drck->attach(drc, core->dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
> +    if (local_err) {
> +        g_free(fdt);
> +        error_propagate(core->errp, local_err);
> +        return 1;
> +    }
> +
> +    /*
> +     * We send hotplug notification interrupt to the guest only in case
> +     * of hotplugged CPUs.
> +     */
> +    if (dev->hotplugged) {
> +        spapr_hotplug_req_add_by_index(drc);
> +    } else {
> +        /*
> +         * HACK to support removal of hotplugged CPU after VM migration:
> +         *
> +         * Since we want to be able to hot-remove those coldplugged CPUs
> +         * started at boot time using -device option at the target VM, we set
> +         * the right allocation_state and isolation_state for them, which for
> +         * the hotplugged CPUs would be set via RTAS calls done from the
> +         * guest during hotplug.
> +         *
> +         * This allows the coldplugged CPUs started using -device option to
> +         * have the right isolation and allocation states as expected by the
> +         * CPU hot removal code.
> +         *
> +         * This hack will be removed once we have DRC states migrated as part
> +         * of VM migration.
> +         */
> +        drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
> +        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);

I'm not fully understanding why this is a hack.  Aren't those the
right allocation and isolation states for a cpu that was present at
boot?

> +    }
> +    return 0;
> +}
> +
> +static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                            Error **errp)
> +{
> +    sPAPRCoreState core;
> +
> +    core.dev = dev;
> +    core.errp = errp;
> +    object_child_foreach(OBJECT(dev), spapr_core_attach, &core);
> +}
> +
>  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>                                        DeviceState *dev, Error **errp)
>  {
>      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
>  
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>          int node;
> @@ -2262,6 +2401,34 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>          }
>  
>          spapr_memory_plug(hotplug_dev, dev, node, errp);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        CPUState *cs = CPU(dev);
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        int i;
> +
> +        /* Set NUMA node for the added CPUs  */
> +        for (i = 0; i < nb_numa_nodes; i++) {
> +            if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +                cs->numa_node = i;
> +                break;
> +            }
> +        }
> +
> +        if (!smc->dr_cpu_enabled) {
> +            if (dev->hotplugged) {
> +                error_setg(errp, "CPU hotplug not supported for this machine");
> +                cpu_remove_sync(cs);
> +                return;
> +            } else {
> +                spapr_cpu_init(ms, cpu);

You could just continue onto the code below, yes?  the cpu_reset()
would be unnecessary but harmless IIUC.

> +                return;
> +            }
> +        }
> +
> +        spapr_cpu_init(ms, cpu);
> +        spapr_cpu_reset(cpu);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
> +        spapr_core_plug(hotplug_dev, dev, errp);

So, I see that there are branches here for both individual vcpu
objects and for cpu core objects.  I'm assuming it's only intended
that the user add core objects, and the vcpu path is for the vcpus
constructed by the core object.  Is that right?

Does anything enforce that the user can't directly device_add a vcpu
object?

>      }
>  }
>  
> @@ -2276,7 +2443,9 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>  static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
>                                               DeviceState *dev)
>  {
> -    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
>          return HOTPLUG_HANDLER(machine);
>      }
>      return NULL;
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 744ea62..1063036 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -436,6 +436,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>      case SPAPR_DR_CONNECTOR_TYPE_LMB:
>          hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
>          break;
> +    case SPAPR_DR_CONNECTOR_TYPE_CPU:
> +        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
> +        break;
>      default:
>          /* we shouldn't be signaling hotplug events for resources
>           * that don't support them
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 34b12a3..7baa862 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -33,6 +33,7 @@
>  
>  #include "hw/ppc/spapr.h"
>  #include "hw/ppc/spapr_vio.h"
> +#include "hw/ppc/ppc.h"
>  #include "qapi-event.h"
>  #include "hw/boards.h"
>  
> @@ -159,6 +160,27 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
>      rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>  }
>  
> +/*
> + * Set the timebase offset of the CPU to that of first CPU.
> + * This helps hotplugged CPU to have the correct timebase offset.
> + */
> +static void spapr_cpu_update_tb_offset(PowerPCCPU *cpu)
> +{
> +    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> +
> +    cpu->env.tb_env->tb_offset = fcpu->env.tb_env->tb_offset;
> +}
> +
> +static void spapr_cpu_set_endianness(PowerPCCPU *cpu)
> +{
> +    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(fcpu);
> +
> +    if (!pcc->interrupts_big_endian(fcpu)) {
> +        cpu->env.spr[SPR_LPCR] |= LPCR_ILE;
> +    }
> +}
> +
>  static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
>                             uint32_t token, uint32_t nargs,
>                             target_ulong args,
> @@ -195,6 +217,8 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
>          env->nip = start;
>          env->gpr[3] = r3;
>          cs->halted = 0;
> +        spapr_cpu_set_endianness(cpu);
> +        spapr_cpu_update_tb_offset(cpu);
>  
>          qemu_cpu_kick(cs);
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 739f9ba..68d51d6 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -83,6 +83,11 @@ struct sPAPRMachineState {
>      MemoryHotplugState hotplug_memory;
>  };
>  
> +typedef struct sPAPRCoreState {
> +    DeviceState *dev;
> +    Error **errp;
> +} sPAPRCoreState;
> +
>  #define H_SUCCESS         0
>  #define H_BUSY            1        /* Hardware busy -- retry later */
>  #define H_CLOSED          2        /* Resource closed */
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index d5ae53e..651dd41 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -30,6 +30,9 @@
>  #include "qemu/error-report.h"
>  #include "qapi/visitor.h"
>  #include "hw/qdev-properties.h"
> +#if !defined(CONFIG_USER_ONLY)
> +#include "sysemu/sysemu.h"
> +#endif
>  
>  //#define PPC_DUMP_CPU
>  //#define PPC_DEBUG_SPR
> @@ -8933,6 +8936,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>      }
>  
>  #if !defined(CONFIG_USER_ONLY)
> +    if (cs->cpu_index >= max_cpus) {
> +        error_setg(errp, "Cannot have more than %d CPUs", max_cpus);
> +        return;
> +    }
> +
>      cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
>          + (cs->cpu_index % smp_threads);
>  #endif
Alexey Kardashevskiy Jan. 12, 2016, 11:58 p.m. UTC | #2
On 01/08/2016 05:55 PM, Bharata B Rao wrote:
> Support CPU hotplug via device-add command like this:
>
> (qemu) device_add powerpc64-cpu-core,id=core2
>
> In response to device_add, CPU core device will be created. CPU core
> device creates and realizes CPU thread devices. If the machine type
> supports CPU hotplug, boot-time CPUs are created as CPU core devices
> otherwise they continue to be created as individual CPU devices.
>
> Set up device tree entries for the hotplugged CPU core and use the
> exising EPOW event infrastructure to send CPU hotplug notification to
> the guest.
>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>   hw/ppc/spapr.c              | 183 ++++++++++++++++++++++++++++++++++++++++++--
>   hw/ppc/spapr_events.c       |   3 +
>   hw/ppc/spapr_rtas.c         |  24 ++++++
>   include/hw/ppc/spapr.h      |   5 ++
>   target-ppc/translate_init.c |   8 ++
>   5 files changed, 216 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index a3ce1db..c2af9ca 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -63,6 +63,7 @@
>
>   #include "hw/compat.h"
>   #include "qemu-common.h"
> +#include "hw/ppc/cpu-core.h"
>
>   #include <libfdt.h>
>
> @@ -600,6 +601,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>       size_t page_sizes_prop_size;
>       uint32_t vcpus_per_socket = smp_threads * smp_cores;
>       uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRDRConnector *drc;
> +    sPAPRDRConnectorClass *drck;
> +    int drc_index;
> +
> +    if (smc->dr_cpu_enabled) {
> +        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> +        g_assert(drc);
> +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +        drc_index = drck->get_index(drc);
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +    }
>
>       /* Note: we keep CI large pages off for now because a 64K capable guest
>        * provisioned with large pages might otherwise try to map a qemu
> @@ -1743,6 +1756,8 @@ static void ppc_spapr_init(MachineState *machine)
>       char *filename;
>       int smt = kvmppc_smt_threads();
>       int smp_max_cores = max_cpus/smp_threads;
> +    int spapr_smp_cores = smp_cpus/smp_threads;
> +    Object *core;
>
>       msi_supported = true;
>
> @@ -1822,13 +1837,22 @@ static void ppc_spapr_init(MachineState *machine)
>       if (machine->cpu_model == NULL) {
>           machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
>       }
> -    for (i = 0; i < smp_cpus; i++) {
> -        cpu = cpu_ppc_init(machine->cpu_model);
> -        if (cpu == NULL) {
> -            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> -            exit(1);
> +
> +    if (smc->dr_cpu_enabled) {
> +        for (i = 0; i < spapr_smp_cores; i++) {
> +            core = object_new(TYPE_POWERPC_CPU_CORE);
> +            object_property_set_bool(core, true, "realized", &error_abort);
> +        }
> +    } else {
> +        for (i = 0; i < smp_cpus; i++) {
> +            cpu = cpu_ppc_init(machine->cpu_model);
> +            if (cpu == NULL) {
> +                fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> +                exit(1);
> +            }
> +            object_property_set_bool(OBJECT(cpu), true, "realized",
> +                                     &error_abort);
>           }
> -        spapr_cpu_init(spapr, cpu);
>       }
>
>       if (kvm_enabled()) {
> @@ -2222,10 +2246,125 @@ out:
>       error_propagate(errp, local_err);
>   }
>
> +static void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> +                                           int *fdt_offset,
> +                                           sPAPRMachineState *spapr)
> +{
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    void *fdt;
> +    int offset, fdt_size;
> +    char *nodename;
> +
> +    fdt = create_device_tree(&fdt_size);
> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +    offset = fdt_add_subnode(fdt, 0, nodename);
> +
> +    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
> +    g_free(nodename);
> +
> +    *fdt_offset = offset;
> +    return fdt;
> +}
> +
> +static int spapr_core_attach(Object *obj, void *opaque)
> +{
> +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
> +    sPAPRCoreState *core = opaque;
> +    DeviceState *dev = DEVICE(obj);
> +    CPUState *cs = CPU(dev);
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +    int id = ppc_get_vcpu_dt_id(cpu);
> +    sPAPRDRConnector *drc =
> +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> +    sPAPRDRConnectorClass *drck;
> +    int smt = kvmppc_smt_threads();
> +    Error *local_err = NULL;
> +    void *fdt = NULL;
> +    int fdt_offset = 0;
> +
> +    /*
> +     * Only main SMT thread (thread 0) will continue and signal the
> +     * hotplug event to the guest. Other threads of the core will
> +     * return from here.
> +     */
> +    if ((id % smt) != 0) {
> +        return 0;
> +    }
> +
> +    if (!smc->dr_cpu_enabled) {
> +        /*
> +         * This is a cold plugged CPU but the machine doesn't support
> +         * DR. So skip the hotplug path ensuring that the CPU is brought
> +         * up online with out an associated DR connector.
> +         */
> +        return 0;
> +    }
> +
> +    g_assert(drc);
> +
> +    /*
> +     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
> +     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
> +     */
> +    if (dev->hotplugged) {
> +        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
> +    }


spapr_core_attach() is only called from spapr_core_plug() which is only 
called from spapr_machine_device_plug() which is a hotplug handler so the 
check for dev->hotplugged seems redundant here and below, no? Or this is 
called at the boot time for cold-plug devices?



> +
> +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +    drck->attach(drc, core->dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
> +    if (local_err) {
> +        g_free(fdt);
> +        error_propagate(core->errp, local_err);
> +        return 1;
> +    }
> +
> +    /*
> +     * We send hotplug notification interrupt to the guest only in case
> +     * of hotplugged CPUs.
> +     */
> +    if (dev->hotplugged) {
> +        spapr_hotplug_req_add_by_index(drc);
> +    } else {
> +        /*
> +         * HACK to support removal of hotplugged CPU after VM migration:
> +         *
> +         * Since we want to be able to hot-remove those coldplugged CPUs
> +         * started at boot time using -device option at the target VM, we set
> +         * the right allocation_state and isolation_state for them, which for
> +         * the hotplugged CPUs would be set via RTAS calls done from the
> +         * guest during hotplug.
> +         *
> +         * This allows the coldplugged CPUs started using -device option to
> +         * have the right isolation and allocation states as expected by the
> +         * CPU hot removal code.
> +         *
> +         * This hack will be removed once we have DRC states migrated as part
> +         * of VM migration.
> +         */
> +        drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
> +        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
> +    }
> +    return 0;
> +}
> +
> +static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +                            Error **errp)
> +{
> +    sPAPRCoreState core;
> +
> +    core.dev = dev;
> +    core.errp = errp;
> +    object_child_foreach(OBJECT(dev), spapr_core_attach, &core);
> +}
> +
>   static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>                                         DeviceState *dev, Error **errp)
>   {
>       sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
>
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>           int node;
> @@ -2262,6 +2401,34 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>           }
>
>           spapr_memory_plug(hotplug_dev, dev, node, errp);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> +        CPUState *cs = CPU(dev);
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        int i;
> +
> +        /* Set NUMA node for the added CPUs  */
> +        for (i = 0; i < nb_numa_nodes; i++) {
> +            if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +                cs->numa_node = i;
> +                break;
> +            }
> +        }
> +
> +        if (!smc->dr_cpu_enabled) {
> +            if (dev->hotplugged) {
> +                error_setg(errp, "CPU hotplug not supported for this machine");
> +                cpu_remove_sync(cs);
> +                return;
> +            } else {
> +                spapr_cpu_init(ms, cpu);
> +                return;
> +            }
> +        }
> +
> +        spapr_cpu_init(ms, cpu);
> +        spapr_cpu_reset(cpu);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
> +        spapr_core_plug(hotplug_dev, dev, errp);
>       }
>   }
>
> @@ -2276,7 +2443,9 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>   static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
>                                                DeviceState *dev)
>   {
> -    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
>           return HOTPLUG_HANDLER(machine);
>       }
>       return NULL;
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 744ea62..1063036 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -436,6 +436,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>       case SPAPR_DR_CONNECTOR_TYPE_LMB:
>           hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
>           break;
> +    case SPAPR_DR_CONNECTOR_TYPE_CPU:
> +        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
> +        break;
>       default:
>           /* we shouldn't be signaling hotplug events for resources
>            * that don't support them
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 34b12a3..7baa862 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -33,6 +33,7 @@
>
>   #include "hw/ppc/spapr.h"
>   #include "hw/ppc/spapr_vio.h"
> +#include "hw/ppc/ppc.h"
>   #include "qapi-event.h"
>   #include "hw/boards.h"
>
> @@ -159,6 +160,27 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
>       rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>   }
>
> +/*
> + * Set the timebase offset of the CPU to that of first CPU.
> + * This helps hotplugged CPU to have the correct timebase offset.
> + */
> +static void spapr_cpu_update_tb_offset(PowerPCCPU *cpu)
> +{
> +    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> +
> +    cpu->env.tb_env->tb_offset = fcpu->env.tb_env->tb_offset;
> +}
> +
> +static void spapr_cpu_set_endianness(PowerPCCPU *cpu)
> +{
> +    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(fcpu);
> +
> +    if (!pcc->interrupts_big_endian(fcpu)) {
> +        cpu->env.spr[SPR_LPCR] |= LPCR_ILE;
> +    }
> +}
> +
>   static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
>                              uint32_t token, uint32_t nargs,
>                              target_ulong args,
> @@ -195,6 +217,8 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
>           env->nip = start;
>           env->gpr[3] = r3;
>           cs->halted = 0;
> +        spapr_cpu_set_endianness(cpu);
> +        spapr_cpu_update_tb_offset(cpu);
>
>           qemu_cpu_kick(cs);
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 739f9ba..68d51d6 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -83,6 +83,11 @@ struct sPAPRMachineState {
>       MemoryHotplugState hotplug_memory;
>   };
>
> +typedef struct sPAPRCoreState {
> +    DeviceState *dev;
> +    Error **errp;
> +} sPAPRCoreState;
> +
>   #define H_SUCCESS         0
>   #define H_BUSY            1        /* Hardware busy -- retry later */
>   #define H_CLOSED          2        /* Resource closed */
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index d5ae53e..651dd41 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -30,6 +30,9 @@
>   #include "qemu/error-report.h"
>   #include "qapi/visitor.h"
>   #include "hw/qdev-properties.h"
> +#if !defined(CONFIG_USER_ONLY)
> +#include "sysemu/sysemu.h"
> +#endif
>
>   //#define PPC_DUMP_CPU
>   //#define PPC_DEBUG_SPR
> @@ -8933,6 +8936,11 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
>       }
>
>   #if !defined(CONFIG_USER_ONLY)
> +    if (cs->cpu_index >= max_cpus) {
> +        error_setg(errp, "Cannot have more than %d CPUs", max_cpus);
> +        return;
> +    }
> +
>       cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
>           + (cs->cpu_index % smp_threads);
>   #endif
>
Bharata B Rao Jan. 13, 2016, 3:55 a.m. UTC | #3
On Tue, Jan 12, 2016 at 04:58:44PM +1100, David Gibson wrote:
> On Fri, Jan 08, 2016 at 12:25:18PM +0530, Bharata B Rao wrote:
> > <snip>
> > +static int spapr_core_attach(Object *obj, void *opaque)
> > +{
> > +    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> > +    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
> > +    sPAPRCoreState *core = opaque;
> > +    DeviceState *dev = DEVICE(obj);
> > +    CPUState *cs = CPU(dev);
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +    int id = ppc_get_vcpu_dt_id(cpu);
> > +    sPAPRDRConnector *drc =
> > +        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> > +    sPAPRDRConnectorClass *drck;
> > +    int smt = kvmppc_smt_threads();
> > +    Error *local_err = NULL;
> > +    void *fdt = NULL;
> > +    int fdt_offset = 0;
> > +
> > +    /*
> > +     * Only main SMT thread (thread 0) will continue and signal the
> > +     * hotplug event to the guest. Other threads of the core will
> > +     * return from here.
> > +     */
> > +    if ((id % smt) != 0) {
> > +        return 0;
> > +    }
> > +
> > +    if (!smc->dr_cpu_enabled) {
> > +        /*
> > +         * This is a cold plugged CPU but the machine doesn't support
> > +         * DR. So skip the hotplug path ensuring that the CPU is brought
> > +         * up online with out an associated DR connector.
> > +         */
> > +        return 0;
> > +    }
> > +
> > +    g_assert(drc);
> > +
> > +    /*
> > +     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
> > +     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
> > +     */
> > +    if (dev->hotplugged) {
> > +        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
> > +    }
> > +
> > +    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > +    drck->attach(drc, core->dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
> > +    if (local_err) {
> > +        g_free(fdt);
> > +        error_propagate(core->errp, local_err);
> > +        return 1;
> > +    }
> > +
> > +    /*
> > +     * We send hotplug notification interrupt to the guest only in case
> > +     * of hotplugged CPUs.
> > +     */
> > +    if (dev->hotplugged) {
> > +        spapr_hotplug_req_add_by_index(drc);
> > +    } else {
> > +        /*
> > +         * HACK to support removal of hotplugged CPU after VM migration:
> > +         *
> > +         * Since we want to be able to hot-remove those coldplugged CPUs
> > +         * started at boot time using -device option at the target VM, we set
> > +         * the right allocation_state and isolation_state for them, which for
> > +         * the hotplugged CPUs would be set via RTAS calls done from the
> > +         * guest during hotplug.
> > +         *
> > +         * This allows the coldplugged CPUs started using -device option to
> > +         * have the right isolation and allocation states as expected by the
> > +         * CPU hot removal code.
> > +         *
> > +         * This hack will be removed once we have DRC states migrated as part
> > +         * of VM migration.
> > +         */
> > +        drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
> > +        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
> 
> I'm not fully understanding why this is a hack.  Aren't those the
> right allocation and isolation states for a cpu that was present at
> boot?

Those comments are already old, will remove them. I remember Michael Roth
confirming that setting the initial DRC states like this for cold plugged
CPUs should be alright.

> 
> > +    }
> > +    return 0;
> > +}
> > +
> > +static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > +                            Error **errp)
> > +{
> > +    sPAPRCoreState core;
> > +
> > +    core.dev = dev;
> > +    core.errp = errp;
> > +    object_child_foreach(OBJECT(dev), spapr_core_attach, &core);
> > +}
> > +
> >  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> >                                        DeviceState *dev, Error **errp)
> >  {
> >      sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> > +    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
> >  
> >      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> >          int node;
> > @@ -2262,6 +2401,34 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
> >          }
> >  
> >          spapr_memory_plug(hotplug_dev, dev, node, errp);
> > +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> > +        CPUState *cs = CPU(dev);
> > +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +        int i;
> > +
> > +        /* Set NUMA node for the added CPUs  */
> > +        for (i = 0; i < nb_numa_nodes; i++) {
> > +            if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> > +                cs->numa_node = i;
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (!smc->dr_cpu_enabled) {
> > +            if (dev->hotplugged) {
> > +                error_setg(errp, "CPU hotplug not supported for this machine");
> > +                cpu_remove_sync(cs);
> > +                return;
> > +            } else {
> > +                spapr_cpu_init(ms, cpu);
> 
> You could just continue onto the code below, yes?  the cpu_reset()
> would be unnecessary but harmless IIUC.

Will do that.

> 
> > +                return;
> > +            }
> > +        }
> > +
> > +        spapr_cpu_init(ms, cpu);
> > +        spapr_cpu_reset(cpu);
> > +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
> > +        spapr_core_plug(hotplug_dev, dev, errp);
> 
> So, I see that there are branches here for both individual vcpu
> objects and for cpu core objects.  I'm assuming it's only intended
> that the user add core objects, and the vcpu path is for the vcpus
> constructed by the core object.  Is that right?

That's correct.
 
> 
> Does anything enforce that the user can't directly device_add a vcpu
> object?

CPU objects (like host-powerpc64-cpu or POWER8-powerpc64-cpu etc) will not
be exposed to device_add command since they don't have
cannot_instantiate_with_device_add_yet memer of their DeviceClass set to
false.

Regards,
Bharata.
Bharata B Rao Jan. 13, 2016, 4:01 a.m. UTC | #4
On Wed, Jan 13, 2016 at 10:58:06AM +1100, Alexey Kardashevskiy wrote:
> On 01/08/2016 05:55 PM, Bharata B Rao wrote:
> >Support CPU hotplug via device-add command like this:
> >
> >(qemu) device_add powerpc64-cpu-core,id=core2
> >
> >In response to device_add, CPU core device will be created. CPU core
> >device creates and realizes CPU thread devices. If the machine type
> >supports CPU hotplug, boot-time CPUs are created as CPU core devices
> >otherwise they continue to be created as individual CPU devices.
> >
> >Set up device tree entries for the hotplugged CPU core and use the
> >exising EPOW event infrastructure to send CPU hotplug notification to
> >the guest.
> >
> >Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >---
> >  hw/ppc/spapr.c              | 183 ++++++++++++++++++++++++++++++++++++++++++--
> >  hw/ppc/spapr_events.c       |   3 +
> >  hw/ppc/spapr_rtas.c         |  24 ++++++
> >  include/hw/ppc/spapr.h      |   5 ++
> >  target-ppc/translate_init.c |   8 ++
> >  5 files changed, 216 insertions(+), 7 deletions(-)
> >
> >diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >index a3ce1db..c2af9ca 100644
> >--- a/hw/ppc/spapr.c
> >+++ b/hw/ppc/spapr.c
> >@@ -63,6 +63,7 @@
> >
> >  #include "hw/compat.h"
> >  #include "qemu-common.h"
> >+#include "hw/ppc/cpu-core.h"
> >
> >  #include <libfdt.h>
> >
> >@@ -600,6 +601,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
> >      size_t page_sizes_prop_size;
> >      uint32_t vcpus_per_socket = smp_threads * smp_cores;
> >      uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> >+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> >+    sPAPRDRConnector *drc;
> >+    sPAPRDRConnectorClass *drck;
> >+    int drc_index;
> >+
> >+    if (smc->dr_cpu_enabled) {
> >+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> >+        g_assert(drc);
> >+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> >+        drc_index = drck->get_index(drc);
> >+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> >+    }
> >
> >      /* Note: we keep CI large pages off for now because a 64K capable guest
> >       * provisioned with large pages might otherwise try to map a qemu
> >@@ -1743,6 +1756,8 @@ static void ppc_spapr_init(MachineState *machine)
> >      char *filename;
> >      int smt = kvmppc_smt_threads();
> >      int smp_max_cores = max_cpus/smp_threads;
> >+    int spapr_smp_cores = smp_cpus/smp_threads;
> >+    Object *core;
> >
> >      msi_supported = true;
> >
> >@@ -1822,13 +1837,22 @@ static void ppc_spapr_init(MachineState *machine)
> >      if (machine->cpu_model == NULL) {
> >          machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
> >      }
> >-    for (i = 0; i < smp_cpus; i++) {
> >-        cpu = cpu_ppc_init(machine->cpu_model);
> >-        if (cpu == NULL) {
> >-            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> >-            exit(1);
> >+
> >+    if (smc->dr_cpu_enabled) {
> >+        for (i = 0; i < spapr_smp_cores; i++) {
> >+            core = object_new(TYPE_POWERPC_CPU_CORE);
> >+            object_property_set_bool(core, true, "realized", &error_abort);
> >+        }
> >+    } else {
> >+        for (i = 0; i < smp_cpus; i++) {
> >+            cpu = cpu_ppc_init(machine->cpu_model);
> >+            if (cpu == NULL) {
> >+                fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> >+                exit(1);
> >+            }
> >+            object_property_set_bool(OBJECT(cpu), true, "realized",
> >+                                     &error_abort);
> >          }
> >-        spapr_cpu_init(spapr, cpu);
> >      }
> >
> >      if (kvm_enabled()) {
> >@@ -2222,10 +2246,125 @@ out:
> >      error_propagate(errp, local_err);
> >  }
> >
> >+static void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> >+                                           int *fdt_offset,
> >+                                           sPAPRMachineState *spapr)
> >+{
> >+    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> >+    int id = ppc_get_vcpu_dt_id(cpu);
> >+    void *fdt;
> >+    int offset, fdt_size;
> >+    char *nodename;
> >+
> >+    fdt = create_device_tree(&fdt_size);
> >+    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> >+    offset = fdt_add_subnode(fdt, 0, nodename);
> >+
> >+    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
> >+    g_free(nodename);
> >+
> >+    *fdt_offset = offset;
> >+    return fdt;
> >+}
> >+
> >+static int spapr_core_attach(Object *obj, void *opaque)
> >+{
> >+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> >+    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
> >+    sPAPRCoreState *core = opaque;
> >+    DeviceState *dev = DEVICE(obj);
> >+    CPUState *cs = CPU(dev);
> >+    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >+    int id = ppc_get_vcpu_dt_id(cpu);
> >+    sPAPRDRConnector *drc =
> >+        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
> >+    sPAPRDRConnectorClass *drck;
> >+    int smt = kvmppc_smt_threads();
> >+    Error *local_err = NULL;
> >+    void *fdt = NULL;
> >+    int fdt_offset = 0;
> >+
> >+    /*
> >+     * Only main SMT thread (thread 0) will continue and signal the
> >+     * hotplug event to the guest. Other threads of the core will
> >+     * return from here.
> >+     */
> >+    if ((id % smt) != 0) {
> >+        return 0;
> >+    }
> >+
> >+    if (!smc->dr_cpu_enabled) {
> >+        /*
> >+         * This is a cold plugged CPU but the machine doesn't support
> >+         * DR. So skip the hotplug path ensuring that the CPU is brought
> >+         * up online with out an associated DR connector.
> >+         */
> >+        return 0;
> >+    }
> >+
> >+    g_assert(drc);
> >+
> >+    /*
> >+     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
> >+     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
> >+     */
> >+    if (dev->hotplugged) {
> >+        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
> >+    }
> 
> 
> spapr_core_attach() is only called from spapr_core_plug() which is only
> called from spapr_machine_device_plug() which is a hotplug handler so the
> check for dev->hotplugged seems redundant here and below, no? Or this is
> called at the boot time for cold-plug devices?

->plug() handler will be called for cold plugged CPUs too. For those CPUs
specified using -smp option and for those specified using -device option,
->plug() handler will be called because I am initializing these boot time
CPUs as CPU core devices.

Regards,
Bharata.
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a3ce1db..c2af9ca 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@ 
 
 #include "hw/compat.h"
 #include "qemu-common.h"
+#include "hw/ppc/cpu-core.h"
 
 #include <libfdt.h>
 
@@ -600,6 +601,18 @@  static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     size_t page_sizes_prop_size;
     uint32_t vcpus_per_socket = smp_threads * smp_cores;
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    int drc_index;
+
+    if (smc->dr_cpu_enabled) {
+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
+        g_assert(drc);
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drc_index = drck->get_index(drc);
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+    }
 
     /* Note: we keep CI large pages off for now because a 64K capable guest
      * provisioned with large pages might otherwise try to map a qemu
@@ -1743,6 +1756,8 @@  static void ppc_spapr_init(MachineState *machine)
     char *filename;
     int smt = kvmppc_smt_threads();
     int smp_max_cores = max_cpus/smp_threads;
+    int spapr_smp_cores = smp_cpus/smp_threads;
+    Object *core;
 
     msi_supported = true;
 
@@ -1822,13 +1837,22 @@  static void ppc_spapr_init(MachineState *machine)
     if (machine->cpu_model == NULL) {
         machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
     }
-    for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(machine->cpu_model);
-        if (cpu == NULL) {
-            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
-            exit(1);
+
+    if (smc->dr_cpu_enabled) {
+        for (i = 0; i < spapr_smp_cores; i++) {
+            core = object_new(TYPE_POWERPC_CPU_CORE);
+            object_property_set_bool(core, true, "realized", &error_abort);
+        }
+    } else {
+        for (i = 0; i < smp_cpus; i++) {
+            cpu = cpu_ppc_init(machine->cpu_model);
+            if (cpu == NULL) {
+                fprintf(stderr, "Unable to find PowerPC CPU definition\n");
+                exit(1);
+            }
+            object_property_set_bool(OBJECT(cpu), true, "realized",
+                                     &error_abort);
         }
-        spapr_cpu_init(spapr, cpu);
     }
 
     if (kvm_enabled()) {
@@ -2222,10 +2246,125 @@  out:
     error_propagate(errp, local_err);
 }
 
+static void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
+                                           int *fdt_offset,
+                                           sPAPRMachineState *spapr)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    int id = ppc_get_vcpu_dt_id(cpu);
+    void *fdt;
+    int offset, fdt_size;
+    char *nodename;
+
+    fdt = create_device_tree(&fdt_size);
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
+    offset = fdt_add_subnode(fdt, 0, nodename);
+
+    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
+    g_free(nodename);
+
+    *fdt_offset = offset;
+    return fdt;
+}
+
+static int spapr_core_attach(Object *obj, void *opaque)
+{
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+    sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine());
+    sPAPRCoreState *core = opaque;
+    DeviceState *dev = DEVICE(obj);
+    CPUState *cs = CPU(dev);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int id = ppc_get_vcpu_dt_id(cpu);
+    sPAPRDRConnector *drc =
+        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id);
+    sPAPRDRConnectorClass *drck;
+    int smt = kvmppc_smt_threads();
+    Error *local_err = NULL;
+    void *fdt = NULL;
+    int fdt_offset = 0;
+
+    /*
+     * Only main SMT thread (thread 0) will continue and signal the
+     * hotplug event to the guest. Other threads of the core will
+     * return from here.
+     */
+    if ((id % smt) != 0) {
+        return 0;
+    }
+
+    if (!smc->dr_cpu_enabled) {
+        /*
+         * This is a cold plugged CPU but the machine doesn't support
+         * DR. So skip the hotplug path ensuring that the CPU is brought
+         * up online with out an associated DR connector.
+         */
+        return 0;
+    }
+
+    g_assert(drc);
+
+    /*
+     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
+     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
+     */
+    if (dev->hotplugged) {
+        fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms);
+    }
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    drck->attach(drc, core->dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
+    if (local_err) {
+        g_free(fdt);
+        error_propagate(core->errp, local_err);
+        return 1;
+    }
+
+    /*
+     * We send hotplug notification interrupt to the guest only in case
+     * of hotplugged CPUs.
+     */
+    if (dev->hotplugged) {
+        spapr_hotplug_req_add_by_index(drc);
+    } else {
+        /*
+         * HACK to support removal of hotplugged CPU after VM migration:
+         *
+         * Since we want to be able to hot-remove those coldplugged CPUs
+         * started at boot time using -device option at the target VM, we set
+         * the right allocation_state and isolation_state for them, which for
+         * the hotplugged CPUs would be set via RTAS calls done from the
+         * guest during hotplug.
+         *
+         * This allows the coldplugged CPUs started using -device option to
+         * have the right isolation and allocation states as expected by the
+         * CPU hot removal code.
+         *
+         * This hack will be removed once we have DRC states migrated as part
+         * of VM migration.
+         */
+        drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
+        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
+    }
+    return 0;
+}
+
+static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp)
+{
+    sPAPRCoreState core;
+
+    core.dev = dev;
+    core.errp = errp;
+    object_child_foreach(OBJECT(dev), spapr_core_attach, &core);
+}
+
 static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
+    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         int node;
@@ -2262,6 +2401,34 @@  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
         }
 
         spapr_memory_plug(hotplug_dev, dev, node, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+        CPUState *cs = CPU(dev);
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int i;
+
+        /* Set NUMA node for the added CPUs  */
+        for (i = 0; i < nb_numa_nodes; i++) {
+            if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
+                cs->numa_node = i;
+                break;
+            }
+        }
+
+        if (!smc->dr_cpu_enabled) {
+            if (dev->hotplugged) {
+                error_setg(errp, "CPU hotplug not supported for this machine");
+                cpu_remove_sync(cs);
+                return;
+            } else {
+                spapr_cpu_init(ms, cpu);
+                return;
+            }
+        }
+
+        spapr_cpu_init(ms, cpu);
+        spapr_cpu_reset(cpu);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
+        spapr_core_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -2276,7 +2443,9 @@  static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
 static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
                                              DeviceState *dev)
 {
-    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_POWERPC_CPU_CORE)) {
         return HOTPLUG_HANDLER(machine);
     }
     return NULL;
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 744ea62..1063036 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -436,6 +436,9 @@  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     case SPAPR_DR_CONNECTOR_TYPE_LMB:
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
         break;
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
+        break;
     default:
         /* we shouldn't be signaling hotplug events for resources
          * that don't support them
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 34b12a3..7baa862 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -33,6 +33,7 @@ 
 
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/ppc.h"
 #include "qapi-event.h"
 #include "hw/boards.h"
 
@@ -159,6 +160,27 @@  static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
     rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 }
 
+/*
+ * Set the timebase offset of the CPU to that of first CPU.
+ * This helps hotplugged CPU to have the correct timebase offset.
+ */
+static void spapr_cpu_update_tb_offset(PowerPCCPU *cpu)
+{
+    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
+
+    cpu->env.tb_env->tb_offset = fcpu->env.tb_env->tb_offset;
+}
+
+static void spapr_cpu_set_endianness(PowerPCCPU *cpu)
+{
+    PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(fcpu);
+
+    if (!pcc->interrupts_big_endian(fcpu)) {
+        cpu->env.spr[SPR_LPCR] |= LPCR_ILE;
+    }
+}
+
 static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
                            uint32_t token, uint32_t nargs,
                            target_ulong args,
@@ -195,6 +217,8 @@  static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
         env->nip = start;
         env->gpr[3] = r3;
         cs->halted = 0;
+        spapr_cpu_set_endianness(cpu);
+        spapr_cpu_update_tb_offset(cpu);
 
         qemu_cpu_kick(cs);
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 739f9ba..68d51d6 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -83,6 +83,11 @@  struct sPAPRMachineState {
     MemoryHotplugState hotplug_memory;
 };
 
+typedef struct sPAPRCoreState {
+    DeviceState *dev;
+    Error **errp;
+} sPAPRCoreState;
+
 #define H_SUCCESS         0
 #define H_BUSY            1        /* Hardware busy -- retry later */
 #define H_CLOSED          2        /* Resource closed */
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index d5ae53e..651dd41 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -30,6 +30,9 @@ 
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
 #include "hw/qdev-properties.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "sysemu/sysemu.h"
+#endif
 
 //#define PPC_DUMP_CPU
 //#define PPC_DEBUG_SPR
@@ -8933,6 +8936,11 @@  static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 #if !defined(CONFIG_USER_ONLY)
+    if (cs->cpu_index >= max_cpus) {
+        error_setg(errp, "Cannot have more than %d CPUs", max_cpus);
+        return;
+    }
+
     cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
         + (cs->cpu_index % smp_threads);
 #endif