diff mbox

spapr: add "compat" machine option

Message ID 1380269200-8194-1-git-send-email-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy Sept. 27, 2013, 8:06 a.m. UTC
To be able to boot on newer hardware that the software support,
PowerISA defines a logical PVR, one per every PowerISA specification
version from 2.04.

This adds the "compat" option which takes values 205 or 206 and forces
QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
CPU_POWERPC_LOGICAL_2_06).

The guest reads the logical PVR value from "cpu-version" property of
a CPU device node.

Cc: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Cc: Andreas Färber <afaerber@suse.de>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h  |  2 ++
 target-ppc/cpu-models.h | 10 ++++++++++
 target-ppc/cpu.h        |  3 +++
 target-ppc/kvm.c        |  2 ++
 vl.c                    |  4 ++++
 6 files changed, 61 insertions(+)

Comments

Alexander Graf Sept. 30, 2013, 11:25 a.m. UTC | #1
On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:

> To be able to boot on newer hardware that the software support,
> PowerISA defines a logical PVR, one per every PowerISA specification
> version from 2.04.
> 
> This adds the "compat" option which takes values 205 or 206 and forces
> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
> CPU_POWERPC_LOGICAL_2_06).
> 
> The guest reads the logical PVR value from "cpu-version" property of
> a CPU device node.
> 
> Cc: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> Cc: Andreas Färber <afaerber@suse.de>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
> include/hw/ppc/spapr.h  |  2 ++
> target-ppc/cpu-models.h | 10 ++++++++++
> target-ppc/cpu.h        |  3 +++
> target-ppc/kvm.c        |  2 ++
> vl.c                    |  4 ++++
> 6 files changed, 61 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index a09a1d9..737452d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -33,6 +33,7 @@
> #include "sysemu/kvm.h"
> #include "kvm_ppc.h"
> #include "mmu-hash64.h"
> +#include "cpu-models.h"
> 
> #include "hw/boards.h"
> #include "hw/ppc/ppc.h"
> @@ -196,6 +197,26 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs)
>     return icp;
> }
> 
> +static void spapr_compat_mode_init(sPAPREnvironment *spapr)
> +{
> +    QemuOpts *machine_opts = qemu_get_machine_opts();
> +    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
> +
> +    switch (compat) {
> +    case 0:
> +        break;
> +    case 205:
> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
> +        break;
> +    case 206:
> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;

Does it make sense to declare compat mode a number or would a string be easier for users? I can imagine that "-machine compat=power6" is easier to understand for a user than "-machine compat=205".

> +        break;
> +    default:
> +        perror("Unsupported mode, only are 205, 206 supported\n");
> +        break;
> +    }
> +}
> +
> static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> {
>     int ret = 0, offset;
> @@ -206,6 +227,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> 
>     CPU_FOREACH(cpu) {
>         DeviceClass *dc = DEVICE_GET_CLASS(cpu);
> +        CPUPPCState *env = &(POWERPC_CPU(cpu)->env);
>         uint32_t associativity[] = {cpu_to_be32(0x5),
>                                     cpu_to_be32(0x0),
>                                     cpu_to_be32(0x0),
> @@ -238,6 +260,14 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>         if (ret < 0) {
>             return ret;
>         }
> +
> +        if (env->arch_compat) {
> +            ret = fdt_setprop(fdt, offset, "cpu-version",
> +                              &env->arch_compat, sizeof(env->arch_compat));
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
>     }
>     return ret;
> }
> @@ -1145,6 +1175,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>     spapr = g_malloc0(sizeof(*spapr));
>     QLIST_INIT(&spapr->phbs);
> 
> +    spapr_compat_mode_init(spapr);
> +
>     cpu_ppc_hypercall = emulate_spapr_hypercall;
> 
>     /* Allocate RMA if necessary */
> @@ -1226,6 +1258,14 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> 
>         xics_cpu_setup(spapr->icp, cpu);
> 
> +        /*
> +         * If compat mode is set in the command line, pass it to CPU so KVM
> +         * will be able to set it in the host kernel.
> +         */
> +        if (spapr->arch_compat) {
> +            env->arch_compat = spapr->arch_compat;

You should set the compat mode in KVM here, rather than doing it in the put_registers call which gets invoked on every register sync. Or can the guest change the mode?

Also, we need to handle failure. If the kernel can not set the CPU to 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail out here.

And then there's the TCG question. We either have to disable CPU features similar to how we handle it in KVM (by setting and honoring the respective bits in PCR) or we need to bail out too and declare compat mode unsupported for TCG.

And then there's the fact that the kernel interface isn't upstream in a way that

> +        }
> +
>         qemu_register_reset(spapr_cpu_reset, cpu);
>     }
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index ca175b0..201c578 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -34,6 +34,8 @@ typedef struct sPAPREnvironment {
>     uint32_t epow_irq;
>     Notifier epow_notifier;
> 
> +    uint32_t arch_compat;        /* Compatible PVR from the command line */
> +
>     /* Migration state */
>     int htab_save_index;
>     bool htab_first_pass;
> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
> index 49ba4a4..d7c033c 100644
> --- a/target-ppc/cpu-models.h
> +++ b/target-ppc/cpu-models.h
> @@ -583,6 +583,16 @@ enum {
>     CPU_POWERPC_RS64II             = 0x00340000,
>     CPU_POWERPC_RS64III            = 0x00360000,
>     CPU_POWERPC_RS64IV             = 0x00370000,
> +
> +    /* Logical CPUs */
> +    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
> +    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
> +    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
> +    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
> +    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
> +    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
> +    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,

These don't really belong here.

> +
> #endif /* defined(TARGET_PPC64) */
>     /* Original POWER */
>     /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 422a6bb..fc837c1 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -999,6 +999,9 @@ struct CPUPPCState {
>     /* Device control registers */
>     ppc_dcr_t *dcr_env;
> 
> +    /* Architecture compatibility mode */
> +    uint32_t arch_compat;

Do we really need to carry this in the vcpu struct? Or can we just fire-and-forget about it? If we want to preserve anything, it should be the PCR register.


Alex

> +
>     int dcache_line_size;
>     int icache_line_size;
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 4bc4496..7b853a3 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -872,6 +872,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
>             }
>         }
> +        kvm_set_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
> #endif /* TARGET_PPC64 */
>     }
> 
> @@ -1083,6 +1084,7 @@ int kvm_arch_get_registers(CPUState *cs)
>                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
>             }
>         }
> +        kvm_get_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
> #endif
>     }
> 
> diff --git a/vl.c b/vl.c
> index 4e709d5..90dad7b 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -427,6 +427,10 @@ static QemuOptsList qemu_machine_opts = {
>             .name = "usb",
>             .type = QEMU_OPT_BOOL,
>             .help = "Set on/off to enable/disable usb",
> +        }, {
> +            .name = "compat",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "Selects compatibility mode on CPU",
>         },
>         { /* End of list */ }
>     },
> -- 
> 1.8.4.rc4
>
Paolo Bonzini Sept. 30, 2013, 11:52 a.m. UTC | #2
Il 30/09/2013 13:25, Alexander Graf ha scritto:
> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
> 
>> > To be able to boot on newer hardware that the software support,
>> > PowerISA defines a logical PVR, one per every PowerISA specification
>> > version from 2.04.
>> > 
>> > This adds the "compat" option which takes values 205 or 206 and forces
>> > QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>> > CPU_POWERPC_LOGICAL_2_06).
>> > 
>> > The guest reads the logical PVR value from "cpu-version" property of
>> > a CPU device node.
>> > 

Why is the option under -machine instead of -cpu?

Paolo
Alexey Kardashevskiy Sept. 30, 2013, 12:57 p.m. UTC | #3
On 30.09.2013 21:52, Paolo Bonzini wrote:
> Il 30/09/2013 13:25, Alexander Graf ha scritto:
>> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
>>
>>>> To be able to boot on newer hardware that the software support,
>>>> PowerISA defines a logical PVR, one per every PowerISA specification
>>>> version from 2.04.
>>>>
>>>> This adds the "compat" option which takes values 205 or 206 and forces
>>>> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>>>> CPU_POWERPC_LOGICAL_2_06).
>>>>
>>>> The guest reads the logical PVR value from "cpu-version" property of
>>>> a CPU device node.
>>>>
> 
> Why is the option under -machine instead of -cpu?

Because it is still the same CPU and the guest will still read the real
PVR from the hardware (which it may not support but this is why we need
compatibility mode).

It is possible to run QEMU with -cpu POWER8 with some old distro which
does not know about power8, it will switch to power6-compat mode, the
the user does "yum update" (or analog) and then the new kernel (with
power8 support) will do H_CAS again and this time "raw" power8 should be
selected.
Alexey Kardashevskiy Sept. 30, 2013, 1:22 p.m. UTC | #4
On 30.09.2013 21:25, Alexander Graf wrote:
> 
> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
> 
>> To be able to boot on newer hardware that the software support,
>> PowerISA defines a logical PVR, one per every PowerISA specification
>> version from 2.04.
>>
>> This adds the "compat" option which takes values 205 or 206 and forces
>> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>> CPU_POWERPC_LOGICAL_2_06).
>>
>> The guest reads the logical PVR value from "cpu-version" property of
>> a CPU device node.
>>
>> Cc: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> Cc: Andreas Färber <afaerber@suse.de>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>> hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
>> include/hw/ppc/spapr.h  |  2 ++
>> target-ppc/cpu-models.h | 10 ++++++++++
>> target-ppc/cpu.h        |  3 +++
>> target-ppc/kvm.c        |  2 ++
>> vl.c                    |  4 ++++
>> 6 files changed, 61 insertions(+)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index a09a1d9..737452d 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -33,6 +33,7 @@
>> #include "sysemu/kvm.h"
>> #include "kvm_ppc.h"
>> #include "mmu-hash64.h"
>> +#include "cpu-models.h"
>>
>> #include "hw/boards.h"
>> #include "hw/ppc/ppc.h"
>> @@ -196,6 +197,26 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs)
>>     return icp;
>> }
>>
>> +static void spapr_compat_mode_init(sPAPREnvironment *spapr)
>> +{
>> +    QemuOpts *machine_opts = qemu_get_machine_opts();
>> +    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
>> +
>> +    switch (compat) {
>> +    case 0:
>> +        break;
>> +    case 205:
>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
>> +        break;
>> +    case 206:
>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
> 

> Does it make sense to declare compat mode a number or would a string
> be easier for users? I can imagine that "-machine compat=power6" is
> easier to understand for a user than "-machine compat=205".

I just follow the PowerISA spec. It does not say anywhere (at least I do
not see it) that 2.05==power6. 2.05 was released when power6 was
released and power6 supports 2.05 but these are not synonims. And
"compat=power6" would not set "cpu-version" to any of power6 PVRs, it
still will be a logical PVR. It confuses me too to tell qemu "205"
instead of "power6" but it is the spec to blame :)



>> +        break;
>> +    default:
>> +        perror("Unsupported mode, only are 205, 206 supported\n");
>> +        break;
>> +    }
>> +}
>> +
>> static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>> {
>>     int ret = 0, offset;
>> @@ -206,6 +227,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>
>>     CPU_FOREACH(cpu) {
>>         DeviceClass *dc = DEVICE_GET_CLASS(cpu);
>> +        CPUPPCState *env = &(POWERPC_CPU(cpu)->env);
>>         uint32_t associativity[] = {cpu_to_be32(0x5),
>>                                     cpu_to_be32(0x0),
>>                                     cpu_to_be32(0x0),
>> @@ -238,6 +260,14 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>         if (ret < 0) {
>>             return ret;
>>         }
>> +
>> +        if (env->arch_compat) {
>> +            ret = fdt_setprop(fdt, offset, "cpu-version",
>> +                              &env->arch_compat, sizeof(env->arch_compat));
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>>     }
>>     return ret;
>> }
>> @@ -1145,6 +1175,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>     spapr = g_malloc0(sizeof(*spapr));
>>     QLIST_INIT(&spapr->phbs);
>>
>> +    spapr_compat_mode_init(spapr);
>> +
>>     cpu_ppc_hypercall = emulate_spapr_hypercall;
>>
>>     /* Allocate RMA if necessary */
>> @@ -1226,6 +1258,14 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>
>>         xics_cpu_setup(spapr->icp, cpu);
>>
>> +        /*
>> +         * If compat mode is set in the command line, pass it to CPU so KVM
>> +         * will be able to set it in the host kernel.
>> +         */
>> +        if (spapr->arch_compat) {
>> +            env->arch_compat = spapr->arch_compat;
> 

> You should set the compat mode in KVM here, rather than doing it in
the put_registers call which gets invoked on every register sync. Or can
the guest change the mode?


I will change it here in the next patch (which requires kernel changes
which are not there yet). The guest cannot change it directly but it can
indirectly via "client-architecture-support".


> Also, we need to handle failure. If the kernel can not set the CPU
> to
2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail
out here.

Yep, I'll add this easy check :)

> And then there's the TCG question. We either have to disable CPU
features similar to how we handle it in KVM (by setting and honoring the
respective bits in PCR) or we need to bail out too and declare compat
mode unsupported for TCG.

At the moment we want to run old distro on new CPUs. This patch would
make more sense with the "ibm,client-architecture-support" and "power8
registers migration" patches which I did not post yet.

Disabling "unsupported" bits in TCG would be really nice indeed and we
should eventually do this but 1) it will not really hurt the guest if we
did not disable some feature (really old guest will not use it and
that's it)  2) once created, PowerPCCPUState (or whatever the exact name
is, I am writing this mail on windows machine :) ) is not very flexible
to reconfigure...


> And then there's the fact that the kernel interface isn't upstream in a way that

? unfinished sentence? What is missing in the kernel right now?


>> +        }
>> +
>>         qemu_register_reset(spapr_cpu_reset, cpu);
>>     }
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index ca175b0..201c578 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -34,6 +34,8 @@ typedef struct sPAPREnvironment {
>>     uint32_t epow_irq;
>>     Notifier epow_notifier;
>>
>> +    uint32_t arch_compat;        /* Compatible PVR from the command line */
>> +
>>     /* Migration state */
>>     int htab_save_index;
>>     bool htab_first_pass;
>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>> index 49ba4a4..d7c033c 100644
>> --- a/target-ppc/cpu-models.h
>> +++ b/target-ppc/cpu-models.h
>> @@ -583,6 +583,16 @@ enum {
>>     CPU_POWERPC_RS64II             = 0x00340000,
>>     CPU_POWERPC_RS64III            = 0x00360000,
>>     CPU_POWERPC_RS64IV             = 0x00370000,
>> +
>> +    /* Logical CPUs */
>> +    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
>> +    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
>> +    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
>> +    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
>> +    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
>> +    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
>> +    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,
> 
> These don't really belong here.


Sorry, I do not understand. These are PVRs which are used in
"cpu-version" DT property. They are logical PVRs but still PVRs - i.e.
the guest has PVR masks for them too.


>> +
>> #endif /* defined(TARGET_PPC64) */
>>     /* Original POWER */
>>     /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>> index 422a6bb..fc837c1 100644
>> --- a/target-ppc/cpu.h
>> +++ b/target-ppc/cpu.h
>> @@ -999,6 +999,9 @@ struct CPUPPCState {
>>     /* Device control registers */
>>     ppc_dcr_t *dcr_env;
>>
>> +    /* Architecture compatibility mode */
>> +    uint32_t arch_compat;
> 


> Do we really need to carry this in the vcpu struct? Or can we just
> fire-and-forget about it? If we want to preserve anything, it should
> be the PCR register.

This is the current PVR value aka "cpu-version" property. It may change
during reboots as every reboot does the "client-architecture-support"
call so the logical PVR can change.


> 
> 
> Alex
> 
>> +
>>     int dcache_line_size;
>>     int icache_line_size;
>>
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 4bc4496..7b853a3 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -872,6 +872,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>>                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
>>             }
>>         }
>> +        kvm_set_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
>> #endif /* TARGET_PPC64 */
>>     }
>>
>> @@ -1083,6 +1084,7 @@ int kvm_arch_get_registers(CPUState *cs)
>>                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
>>             }
>>         }
>> +        kvm_get_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
>> #endif
>>     }
>>
>> diff --git a/vl.c b/vl.c
>> index 4e709d5..90dad7b 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -427,6 +427,10 @@ static QemuOptsList qemu_machine_opts = {
>>             .name = "usb",
>>             .type = QEMU_OPT_BOOL,
>>             .help = "Set on/off to enable/disable usb",
>> +        }, {
>> +            .name = "compat",
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "Selects compatibility mode on CPU",
>>         },
>>         { /* End of list */ }
>>     },
>> -- 
>> 1.8.4.rc4
>>
>
Alexander Graf Sept. 30, 2013, 2:49 p.m. UTC | #5
On 09/30/2013 03:22 PM, Alexey Kardashevskiy wrote:
> On 30.09.2013 21:25, Alexander Graf wrote:
>> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
>>
>>> To be able to boot on newer hardware that the software support,
>>> PowerISA defines a logical PVR, one per every PowerISA specification
>>> version from 2.04.
>>>
>>> This adds the "compat" option which takes values 205 or 206 and forces
>>> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>>> CPU_POWERPC_LOGICAL_2_06).
>>>
>>> The guest reads the logical PVR value from "cpu-version" property of
>>> a CPU device node.
>>>
>>> Cc: Nikunj A Dadhania<nikunj@linux.vnet.ibm.com>
>>> Cc: Andreas Färber<afaerber@suse.de>
>>> Signed-off-by: Alexey Kardashevskiy<aik@ozlabs.ru>
>>> ---
>>> hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
>>> include/hw/ppc/spapr.h  |  2 ++
>>> target-ppc/cpu-models.h | 10 ++++++++++
>>> target-ppc/cpu.h        |  3 +++
>>> target-ppc/kvm.c        |  2 ++
>>> vl.c                    |  4 ++++
>>> 6 files changed, 61 insertions(+)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index a09a1d9..737452d 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -33,6 +33,7 @@
>>> #include "sysemu/kvm.h"
>>> #include "kvm_ppc.h"
>>> #include "mmu-hash64.h"
>>> +#include "cpu-models.h"
>>>
>>> #include "hw/boards.h"
>>> #include "hw/ppc/ppc.h"
>>> @@ -196,6 +197,26 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs)
>>>      return icp;
>>> }
>>>
>>> +static void spapr_compat_mode_init(sPAPREnvironment *spapr)
>>> +{
>>> +    QemuOpts *machine_opts = qemu_get_machine_opts();
>>> +    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
>>> +
>>> +    switch (compat) {
>>> +    case 0:
>>> +        break;
>>> +    case 205:
>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
>>> +        break;
>>> +    case 206:
>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
>> Does it make sense to declare compat mode a number or would a string
>> be easier for users? I can imagine that "-machine compat=power6" is
>> easier to understand for a user than "-machine compat=205".
> I just follow the PowerISA spec. It does not say anywhere (at least I do
> not see it) that 2.05==power6. 2.05 was released when power6 was
> released and power6 supports 2.05 but these are not synonims. And
> "compat=power6" would not set "cpu-version" to any of power6 PVRs, it
> still will be a logical PVR. It confuses me too to tell qemu "205"
> instead of "power6" but it is the spec to blame :)

So what is "2_06 plus" then? :)

To me it really sounds like a 1:1 mapping to cores rather than specs - 
the ISA defines a lot more capabilities than a single core necessarily 
supports, especially with the inclusion of booke into the generic ppc spec.

>
>
>
>>> +        break;
>>> +    default:
>>> +        perror("Unsupported mode, only are 205, 206 supported\n");
>>> +        break;
>>> +    }
>>> +}
>>> +
>>> static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>> {
>>>      int ret = 0, offset;
>>> @@ -206,6 +227,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>>
>>>      CPU_FOREACH(cpu) {
>>>          DeviceClass *dc = DEVICE_GET_CLASS(cpu);
>>> +        CPUPPCState *env =&(POWERPC_CPU(cpu)->env);
>>>          uint32_t associativity[] = {cpu_to_be32(0x5),
>>>                                      cpu_to_be32(0x0),
>>>                                      cpu_to_be32(0x0),
>>> @@ -238,6 +260,14 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>>          if (ret<  0) {
>>>              return ret;
>>>          }
>>> +
>>> +        if (env->arch_compat) {
>>> +            ret = fdt_setprop(fdt, offset, "cpu-version",
>>> +&env->arch_compat, sizeof(env->arch_compat));
>>> +            if (ret<  0) {
>>> +                return ret;
>>> +            }
>>> +        }
>>>      }
>>>      return ret;
>>> }
>>> @@ -1145,6 +1175,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>      spapr = g_malloc0(sizeof(*spapr));
>>>      QLIST_INIT(&spapr->phbs);
>>>
>>> +    spapr_compat_mode_init(spapr);
>>> +
>>>      cpu_ppc_hypercall = emulate_spapr_hypercall;
>>>
>>>      /* Allocate RMA if necessary */
>>> @@ -1226,6 +1258,14 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>>>
>>>          xics_cpu_setup(spapr->icp, cpu);
>>>
>>> +        /*
>>> +         * If compat mode is set in the command line, pass it to CPU so KVM
>>> +         * will be able to set it in the host kernel.
>>> +         */
>>> +        if (spapr->arch_compat) {
>>> +            env->arch_compat = spapr->arch_compat;
>> You should set the compat mode in KVM here, rather than doing it in
> the put_registers call which gets invoked on every register sync. Or can
> the guest change the mode?
>
>
> I will change it here in the next patch (which requires kernel changes
> which are not there yet). The guest cannot change it directly but it can
> indirectly via "client-architecture-support".

They probably want a generic callback then. What happens on reset?

>
>
>> Also, we need to handle failure. If the kernel can not set the CPU
>> to
> 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail
> out here.
>
> Yep, I'll add this easy check :)
>
>> And then there's the TCG question. We either have to disable CPU
> features similar to how we handle it in KVM (by setting and honoring the
> respective bits in PCR) or we need to bail out too and declare compat
> mode unsupported for TCG.
>
> At the moment we want to run old distro on new CPUs. This patch would
> make more sense with the "ibm,client-architecture-support" and "power8
> registers migration" patches which I did not post yet.
>
> Disabling "unsupported" bits in TCG would be really nice indeed and we
> should eventually do this but 1) it will not really hurt the guest if we
> did not disable some feature (really old guest will not use it and
> that's it)  2) once created, PowerPCCPUState (or whatever the exact name
> is, I am writing this mail on windows machine :) ) is not very flexible
> to reconfigure...

Can't you just set the bits in PCR and add an XXX comment indicating 
that we're currently not honoring them? Then fron the machine code point 
of view, everything is implemented.

>
>
>> And then there's the fact that the kernel interface isn't upstream in a way that
> ? unfinished sentence? What is missing in the kernel right now?

Eh. I think I wanted to say that this depends on in-kernel patches that 
are not upstream yet :).

>
>
>>> +        }
>>> +
>>>          qemu_register_reset(spapr_cpu_reset, cpu);
>>>      }
>>>
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index ca175b0..201c578 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -34,6 +34,8 @@ typedef struct sPAPREnvironment {
>>>      uint32_t epow_irq;
>>>      Notifier epow_notifier;
>>>
>>> +    uint32_t arch_compat;        /* Compatible PVR from the command line */
>>> +
>>>      /* Migration state */
>>>      int htab_save_index;
>>>      bool htab_first_pass;
>>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>>> index 49ba4a4..d7c033c 100644
>>> --- a/target-ppc/cpu-models.h
>>> +++ b/target-ppc/cpu-models.h
>>> @@ -583,6 +583,16 @@ enum {
>>>      CPU_POWERPC_RS64II             = 0x00340000,
>>>      CPU_POWERPC_RS64III            = 0x00360000,
>>>      CPU_POWERPC_RS64IV             = 0x00370000,
>>> +
>>> +    /* Logical CPUs */
>>> +    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
>>> +    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
>>> +    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
>>> +    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
>>> +    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
>>> +    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
>>> +    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,
>> These don't really belong here.
>
> Sorry, I do not understand. These are PVRs which are used in
> "cpu-version" DT property. They are logical PVRs but still PVRs - i.e.
> the guest has PVR masks for them too.

But they are specific to PAPR, not PowerPC in general, no? And you would 
never find one in the PVR register of a guest.

>
>
>>> +
>>> #endif /* defined(TARGET_PPC64) */
>>>      /* Original POWER */
>>>      /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
>>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>>> index 422a6bb..fc837c1 100644
>>> --- a/target-ppc/cpu.h
>>> +++ b/target-ppc/cpu.h
>>> @@ -999,6 +999,9 @@ struct CPUPPCState {
>>>      /* Device control registers */
>>>      ppc_dcr_t *dcr_env;
>>>
>>> +    /* Architecture compatibility mode */
>>> +    uint32_t arch_compat;
>
>> Do we really need to carry this in the vcpu struct? Or can we just
>> fire-and-forget about it? If we want to preserve anything, it should
>> be the PCR register.
> This is the current PVR value aka "cpu-version" property. It may change
> during reboots as every reboot does the "client-architecture-support"
> call so the logical PVR can change.

Ok, so again: What happens on reset / reboot? Do we want to preserve the 
compatibility setting or does that get reset as well?


Alex
Alexey Kardashevskiy Nov. 5, 2013, 2:19 a.m. UTC | #6
On 10/01/2013 12:49 AM, Alexander Graf wrote:
> On 09/30/2013 03:22 PM, Alexey Kardashevskiy wrote:
>> On 30.09.2013 21:25, Alexander Graf wrote:
>>> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:


I realized it has been a while since I got your response and did not answer
:) Sorry for the delay.


>>>
>>>> To be able to boot on newer hardware that the software support,
>>>> PowerISA defines a logical PVR, one per every PowerISA specification
>>>> version from 2.04.
>>>>
>>>> This adds the "compat" option which takes values 205 or 206 and forces
>>>> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>>>> CPU_POWERPC_LOGICAL_2_06).
>>>>
>>>> The guest reads the logical PVR value from "cpu-version" property of
>>>> a CPU device node.
>>>>
>>>> Cc: Nikunj A Dadhania<nikunj@linux.vnet.ibm.com>
>>>> Cc: Andreas Färber<afaerber@suse.de>
>>>> Signed-off-by: Alexey Kardashevskiy<aik@ozlabs.ru>
>>>> ---
>>>> hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
>>>> include/hw/ppc/spapr.h  |  2 ++
>>>> target-ppc/cpu-models.h | 10 ++++++++++
>>>> target-ppc/cpu.h        |  3 +++
>>>> target-ppc/kvm.c        |  2 ++
>>>> vl.c                    |  4 ++++
>>>> 6 files changed, 61 insertions(+)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index a09a1d9..737452d 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -33,6 +33,7 @@
>>>> #include "sysemu/kvm.h"
>>>> #include "kvm_ppc.h"
>>>> #include "mmu-hash64.h"
>>>> +#include "cpu-models.h"
>>>>
>>>> #include "hw/boards.h"
>>>> #include "hw/ppc/ppc.h"
>>>> @@ -196,6 +197,26 @@ static XICSState *xics_system_init(int nr_servers,
>>>> int nr_irqs)
>>>>      return icp;
>>>> }
>>>>
>>>> +static void spapr_compat_mode_init(sPAPREnvironment *spapr)
>>>> +{
>>>> +    QemuOpts *machine_opts = qemu_get_machine_opts();
>>>> +    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
>>>> +
>>>> +    switch (compat) {
>>>> +    case 0:
>>>> +        break;
>>>> +    case 205:
>>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
>>>> +        break;
>>>> +    case 206:
>>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
>>> Does it make sense to declare compat mode a number or would a string
>>> be easier for users? I can imagine that "-machine compat=power6" is
>>> easier to understand for a user than "-machine compat=205".
>> I just follow the PowerISA spec. It does not say anywhere (at least I do
>> not see it) that 2.05==power6. 2.05 was released when power6 was
>> released and power6 supports 2.05 but these are not synonims. And
>> "compat=power6" would not set "cpu-version" to any of power6 PVRs, it
>> still will be a logical PVR. It confuses me too to tell qemu "205"
>> instead of "power6" but it is the spec to blame :)
> 
> So what is "2_06 plus" then? :)

No idea. Why does it matter here?


> To me it really sounds like a 1:1 mapping to cores rather than specs - the
> ISA defines a lot more capabilities than a single core necessarily
> supports, especially with the inclusion of booke into the generic ppc spec.


Sounds - may be. But still not the same. The guest kernel has different
descriptors for power6(raw) and power6(architected) with different flags
and (slightly?) different behavior.


>>>> +        break;
>>>> +    default:
>>>> +        perror("Unsupported mode, only are 205, 206 supported\n");
>>>> +        break;
>>>> +    }
>>>> +}
>>>> +
>>>> static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>>> {
>>>>      int ret = 0, offset;
>>>> @@ -206,6 +227,7 @@ static int spapr_fixup_cpu_dt(void *fdt,
>>>> sPAPREnvironment *spapr)
>>>>
>>>>      CPU_FOREACH(cpu) {
>>>>          DeviceClass *dc = DEVICE_GET_CLASS(cpu);
>>>> +        CPUPPCState *env =&(POWERPC_CPU(cpu)->env);
>>>>          uint32_t associativity[] = {cpu_to_be32(0x5),
>>>>                                      cpu_to_be32(0x0),
>>>>                                      cpu_to_be32(0x0),
>>>> @@ -238,6 +260,14 @@ static int spapr_fixup_cpu_dt(void *fdt,
>>>> sPAPREnvironment *spapr)
>>>>          if (ret<  0) {
>>>>              return ret;
>>>>          }
>>>> +
>>>> +        if (env->arch_compat) {
>>>> +            ret = fdt_setprop(fdt, offset, "cpu-version",
>>>> +&env->arch_compat, sizeof(env->arch_compat));
>>>> +            if (ret<  0) {
>>>> +                return ret;
>>>> +            }
>>>> +        }
>>>>      }
>>>>      return ret;
>>>> }
>>>> @@ -1145,6 +1175,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs
>>>> *args)
>>>>      spapr = g_malloc0(sizeof(*spapr));
>>>>      QLIST_INIT(&spapr->phbs);
>>>>
>>>> +    spapr_compat_mode_init(spapr);
>>>> +
>>>>      cpu_ppc_hypercall = emulate_spapr_hypercall;
>>>>
>>>>      /* Allocate RMA if necessary */
>>>> @@ -1226,6 +1258,14 @@ static void ppc_spapr_init(QEMUMachineInitArgs
>>>> *args)
>>>>
>>>>          xics_cpu_setup(spapr->icp, cpu);
>>>>
>>>> +        /*
>>>> +         * If compat mode is set in the command line, pass it to CPU
>>>> so KVM
>>>> +         * will be able to set it in the host kernel.
>>>> +         */
>>>> +        if (spapr->arch_compat) {
>>>> +            env->arch_compat = spapr->arch_compat;
>>> You should set the compat mode in KVM here, rather than doing it in
>> the put_registers call which gets invoked on every register sync. Or can
>> the guest change the mode?
>>
>>
>> I will change it here in the next patch (which requires kernel changes
>> which are not there yet). The guest cannot change it directly but it can
>> indirectly via "client-architecture-support".
> 
> They probably want a generic callback then.


"They"? What callback on what? :) PCR change is privileged instruction so
the guest is not supposed to change it directly and AFAIK (sorry for my
ignorance) "ibm,client-architecture-support" RTAS call is the only way to
set it and it is spapr-only. How do other PPC machines do that change?


> What happens on reset?


PCR has to be reset I believe.


>>> Also, we need to handle failure. If the kernel can not set the CPU
>>> to
>> 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail
>> out here.
>>
>> Yep, I'll add this easy check :)
>>
>>> And then there's the TCG question. We either have to disable CPU
>> features similar to how we handle it in KVM (by setting and honoring the
>> respective bits in PCR) or we need to bail out too and declare compat
>> mode unsupported for TCG.
>>
>> At the moment we want to run old distro on new CPUs. This patch would
>> make more sense with the "ibm,client-architecture-support" and "power8
>> registers migration" patches which I did not post yet.
>>
>> Disabling "unsupported" bits in TCG would be really nice indeed and we
>> should eventually do this but 1) it will not really hurt the guest if we
>> did not disable some feature (really old guest will not use it and
>> that's it)  2) once created, PowerPCCPUState (or whatever the exact name
>> is, I am writing this mail on windows machine :) ) is not very flexible
>> to reconfigure...
> 
> Can't you just set the bits in PCR and add an XXX comment indicating that
> we're currently not honoring them? Then fron the machine code point of
> view, everything is implemented.


Is not it what the patch does at the moment to TCG (except missing comment)?



>>> And then there's the fact that the kernel interface isn't upstream in a
>>> way that
>> ? unfinished sentence? What is missing in the kernel right now?
> 
> Eh. I think I wanted to say that this depends on in-kernel patches that are
> not upstream yet :).
> 
>>
>>
>>>> +        }
>>>> +
>>>>          qemu_register_reset(spapr_cpu_reset, cpu);
>>>>      }
>>>>
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index ca175b0..201c578 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -34,6 +34,8 @@ typedef struct sPAPREnvironment {
>>>>      uint32_t epow_irq;
>>>>      Notifier epow_notifier;
>>>>
>>>> +    uint32_t arch_compat;        /* Compatible PVR from the command
>>>> line */
>>>> +
>>>>      /* Migration state */
>>>>      int htab_save_index;
>>>>      bool htab_first_pass;
>>>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>>>> index 49ba4a4..d7c033c 100644
>>>> --- a/target-ppc/cpu-models.h
>>>> +++ b/target-ppc/cpu-models.h
>>>> @@ -583,6 +583,16 @@ enum {
>>>>      CPU_POWERPC_RS64II             = 0x00340000,
>>>>      CPU_POWERPC_RS64III            = 0x00360000,
>>>>      CPU_POWERPC_RS64IV             = 0x00370000,
>>>> +
>>>> +    /* Logical CPUs */
>>>> +    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
>>>> +    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
>>>> +    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
>>>> +    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
>>>> +    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
>>>> +    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
>>>> +    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,
>>> These don't really belong here.
>>
>> Sorry, I do not understand. These are PVRs which are used in
>> "cpu-version" DT property. They are logical PVRs but still PVRs - i.e.
>> the guest has PVR masks for them too.
> 
> But they are specific to PAPR, not PowerPC in general, no? And you would
> never find one in the PVR register of a guest.


Yes, never. But they all are in the same array in
arch/powerpc/kernel/cputable.c. How is that different?


>>>> +
>>>> #endif /* defined(TARGET_PPC64) */
>>>>      /* Original POWER */
>>>>      /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
>>>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>>>> index 422a6bb..fc837c1 100644
>>>> --- a/target-ppc/cpu.h
>>>> +++ b/target-ppc/cpu.h
>>>> @@ -999,6 +999,9 @@ struct CPUPPCState {
>>>>      /* Device control registers */
>>>>      ppc_dcr_t *dcr_env;
>>>>
>>>> +    /* Architecture compatibility mode */
>>>> +    uint32_t arch_compat;
>>
>>> Do we really need to carry this in the vcpu struct? Or can we just
>>> fire-and-forget about it? If we want to preserve anything, it should
>>> be the PCR register.
>> This is the current PVR value aka "cpu-version" property. It may change
>> during reboots as every reboot does the "client-architecture-support"
>> call so the logical PVR can change.
> 
> Ok, so again: What happens on reset / reboot? Do we want to preserve the
> compatibility setting or does that get reset as well?

The setting must be preserved on reboot ("client-architecture-support"
definitely expects it to be preserved). I am not sure about reset, I guess
it can/should be reset.
Paolo Bonzini Nov. 5, 2013, 9:06 a.m. UTC | #7
Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>> > Why is the option under -machine instead of -cpu?
> Because it is still the same CPU and the guest will still read the real
> PVR from the hardware (which it may not support but this is why we need
> compatibility mode).

How do you support migration from a newer to an older CPU then?  I think
the guest should never see anything about the hardware CPU model.

Paolo

> It is possible to run QEMU with -cpu POWER8 with some old distro which
> does not know about power8, it will switch to power6-compat mode, the
> the user does "yum update" (or analog) and then the new kernel (with
> power8 support) will do H_CAS again and this time "raw" power8 should be
> selected.
Alexander Graf Nov. 5, 2013, 9:16 a.m. UTC | #8
On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>> Why is the option under -machine instead of -cpu?
>> Because it is still the same CPU and the guest will still read the real
>> PVR from the hardware (which it may not support but this is why we need
>> compatibility mode).
> 
> How do you support migration from a newer to an older CPU then?  I think
> the guest should never see anything about the hardware CPU model.

POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.

Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.


Alex
Alexander Graf Nov. 5, 2013, 9:23 a.m. UTC | #9
On 05.11.2013, at 03:19, Alexey Kardashevskiy <aik@ozlabs.ru> wrote:

> On 10/01/2013 12:49 AM, Alexander Graf wrote:
>> On 09/30/2013 03:22 PM, Alexey Kardashevskiy wrote:
>>> On 30.09.2013 21:25, Alexander Graf wrote:
>>>> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
> 
> 
> I realized it has been a while since I got your response and did not answer
> :) Sorry for the delay.
> 
> 
>>>> 
>>>>> To be able to boot on newer hardware that the software support,
>>>>> PowerISA defines a logical PVR, one per every PowerISA specification
>>>>> version from 2.04.
>>>>> 
>>>>> This adds the "compat" option which takes values 205 or 206 and forces
>>>>> QEMU to boot the guest with a logical PVR (CPU_POWERPC_LOGICAL_2_05 or
>>>>> CPU_POWERPC_LOGICAL_2_06).
>>>>> 
>>>>> The guest reads the logical PVR value from "cpu-version" property of
>>>>> a CPU device node.
>>>>> 
>>>>> Cc: Nikunj A Dadhania<nikunj@linux.vnet.ibm.com>
>>>>> Cc: Andreas Färber<afaerber@suse.de>
>>>>> Signed-off-by: Alexey Kardashevskiy<aik@ozlabs.ru>
>>>>> ---
>>>>> hw/ppc/spapr.c          | 40 ++++++++++++++++++++++++++++++++++++++++
>>>>> include/hw/ppc/spapr.h  |  2 ++
>>>>> target-ppc/cpu-models.h | 10 ++++++++++
>>>>> target-ppc/cpu.h        |  3 +++
>>>>> target-ppc/kvm.c        |  2 ++
>>>>> vl.c                    |  4 ++++
>>>>> 6 files changed, 61 insertions(+)
>>>>> 
>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>> index a09a1d9..737452d 100644
>>>>> --- a/hw/ppc/spapr.c
>>>>> +++ b/hw/ppc/spapr.c
>>>>> @@ -33,6 +33,7 @@
>>>>> #include "sysemu/kvm.h"
>>>>> #include "kvm_ppc.h"
>>>>> #include "mmu-hash64.h"
>>>>> +#include "cpu-models.h"
>>>>> 
>>>>> #include "hw/boards.h"
>>>>> #include "hw/ppc/ppc.h"
>>>>> @@ -196,6 +197,26 @@ static XICSState *xics_system_init(int nr_servers,
>>>>> int nr_irqs)
>>>>>     return icp;
>>>>> }
>>>>> 
>>>>> +static void spapr_compat_mode_init(sPAPREnvironment *spapr)
>>>>> +{
>>>>> +    QemuOpts *machine_opts = qemu_get_machine_opts();
>>>>> +    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
>>>>> +
>>>>> +    switch (compat) {
>>>>> +    case 0:
>>>>> +        break;
>>>>> +    case 205:
>>>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
>>>>> +        break;
>>>>> +    case 206:
>>>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
>>>> Does it make sense to declare compat mode a number or would a string
>>>> be easier for users? I can imagine that "-machine compat=power6" is
>>>> easier to understand for a user than "-machine compat=205".
>>> I just follow the PowerISA spec. It does not say anywhere (at least I do
>>> not see it) that 2.05==power6. 2.05 was released when power6 was
>>> released and power6 supports 2.05 but these are not synonims. And
>>> "compat=power6" would not set "cpu-version" to any of power6 PVRs, it
>>> still will be a logical PVR. It confuses me too to tell qemu "205"
>>> instead of "power6" but it is the spec to blame :)
>> 
>> So what is "2_06 plus" then? :)
> 
> No idea. Why does it matter here?
> 
> 
>> To me it really sounds like a 1:1 mapping to cores rather than specs - the
>> ISA defines a lot more capabilities than a single core necessarily
>> supports, especially with the inclusion of booke into the generic ppc spec.
> 
> 
> Sounds - may be. But still not the same. The guest kernel has different
> descriptors for power6(raw) and power6(architected) with different flags
> and (slightly?) different behavior.

So even the guest kernel calls it "power6" then? Why shouldn't we?

> 
> 
>>>>> +        break;
>>>>> +    default:
>>>>> +        perror("Unsupported mode, only are 205, 206 supported\n");
>>>>> +        break;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
>>>>> {
>>>>>     int ret = 0, offset;
>>>>> @@ -206,6 +227,7 @@ static int spapr_fixup_cpu_dt(void *fdt,
>>>>> sPAPREnvironment *spapr)
>>>>> 
>>>>>     CPU_FOREACH(cpu) {
>>>>>         DeviceClass *dc = DEVICE_GET_CLASS(cpu);
>>>>> +        CPUPPCState *env =&(POWERPC_CPU(cpu)->env);
>>>>>         uint32_t associativity[] = {cpu_to_be32(0x5),
>>>>>                                     cpu_to_be32(0x0),
>>>>>                                     cpu_to_be32(0x0),
>>>>> @@ -238,6 +260,14 @@ static int spapr_fixup_cpu_dt(void *fdt,
>>>>> sPAPREnvironment *spapr)
>>>>>         if (ret<  0) {
>>>>>             return ret;
>>>>>         }
>>>>> +
>>>>> +        if (env->arch_compat) {
>>>>> +            ret = fdt_setprop(fdt, offset, "cpu-version",
>>>>> +&env->arch_compat, sizeof(env->arch_compat));
>>>>> +            if (ret<  0) {
>>>>> +                return ret;
>>>>> +            }
>>>>> +        }
>>>>>     }
>>>>>     return ret;
>>>>> }
>>>>> @@ -1145,6 +1175,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs
>>>>> *args)
>>>>>     spapr = g_malloc0(sizeof(*spapr));
>>>>>     QLIST_INIT(&spapr->phbs);
>>>>> 
>>>>> +    spapr_compat_mode_init(spapr);
>>>>> +
>>>>>     cpu_ppc_hypercall = emulate_spapr_hypercall;
>>>>> 
>>>>>     /* Allocate RMA if necessary */
>>>>> @@ -1226,6 +1258,14 @@ static void ppc_spapr_init(QEMUMachineInitArgs
>>>>> *args)
>>>>> 
>>>>>         xics_cpu_setup(spapr->icp, cpu);
>>>>> 
>>>>> +        /*
>>>>> +         * If compat mode is set in the command line, pass it to CPU
>>>>> so KVM
>>>>> +         * will be able to set it in the host kernel.
>>>>> +         */
>>>>> +        if (spapr->arch_compat) {
>>>>> +            env->arch_compat = spapr->arch_compat;
>>>> You should set the compat mode in KVM here, rather than doing it in
>>> the put_registers call which gets invoked on every register sync. Or can
>>> the guest change the mode?
>>> 
>>> 
>>> I will change it here in the next patch (which requires kernel changes
>>> which are not there yet). The guest cannot change it directly but it can
>>> indirectly via "client-architecture-support".
>> 
>> They probably want a generic callback then.
> 
> 
> "They"? What callback on what? :) PCR change is privileged instruction so
> the guest is not supposed to change it directly and AFAIK (sorry for my
> ignorance) "ibm,client-architecture-support" RTAS call is the only way to
> set it and it is spapr-only. How do other PPC machines do that change?

No idea what I was referring to here. Just call a generic helper that changes the value of PCR in the QEMU SPR array as well as KVM.

> 
>> What happens on reset?
> 
> 
> PCR has to be reset I believe.

That needs to be modeled manually somewhere. Please make sure to do this.

> 
> 
>>>> Also, we need to handle failure. If the kernel can not set the CPU
>>>> to
>>> 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail
>>> out here.
>>> 
>>> Yep, I'll add this easy check :)
>>> 
>>>> And then there's the TCG question. We either have to disable CPU
>>> features similar to how we handle it in KVM (by setting and honoring the
>>> respective bits in PCR) or we need to bail out too and declare compat
>>> mode unsupported for TCG.
>>> 
>>> At the moment we want to run old distro on new CPUs. This patch would
>>> make more sense with the "ibm,client-architecture-support" and "power8
>>> registers migration" patches which I did not post yet.
>>> 
>>> Disabling "unsupported" bits in TCG would be really nice indeed and we
>>> should eventually do this but 1) it will not really hurt the guest if we
>>> did not disable some feature (really old guest will not use it and
>>> that's it)  2) once created, PowerPCCPUState (or whatever the exact name
>>> is, I am writing this mail on windows machine :) ) is not very flexible
>>> to reconfigure...
>> 
>> Can't you just set the bits in PCR and add an XXX comment indicating that
>> we're currently not honoring them? Then fron the machine code point of
>> view, everything is implemented.
> 
> 
> Is not it what the patch does at the moment to TCG (except missing comment)?

You're modifying env->arch_compat. You should be modifying env->sprs[SPRN_PCR].

> 
> 
> 
>>>> And then there's the fact that the kernel interface isn't upstream in a
>>>> way that
>>> ? unfinished sentence? What is missing in the kernel right now?
>> 
>> Eh. I think I wanted to say that this depends on in-kernel patches that are
>> not upstream yet :).
>> 
>>> 
>>> 
>>>>> +        }
>>>>> +
>>>>>         qemu_register_reset(spapr_cpu_reset, cpu);
>>>>>     }
>>>>> 
>>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>>> index ca175b0..201c578 100644
>>>>> --- a/include/hw/ppc/spapr.h
>>>>> +++ b/include/hw/ppc/spapr.h
>>>>> @@ -34,6 +34,8 @@ typedef struct sPAPREnvironment {
>>>>>     uint32_t epow_irq;
>>>>>     Notifier epow_notifier;
>>>>> 
>>>>> +    uint32_t arch_compat;        /* Compatible PVR from the command
>>>>> line */
>>>>> +
>>>>>     /* Migration state */
>>>>>     int htab_save_index;
>>>>>     bool htab_first_pass;
>>>>> diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
>>>>> index 49ba4a4..d7c033c 100644
>>>>> --- a/target-ppc/cpu-models.h
>>>>> +++ b/target-ppc/cpu-models.h
>>>>> @@ -583,6 +583,16 @@ enum {
>>>>>     CPU_POWERPC_RS64II             = 0x00340000,
>>>>>     CPU_POWERPC_RS64III            = 0x00360000,
>>>>>     CPU_POWERPC_RS64IV             = 0x00370000,
>>>>> +
>>>>> +    /* Logical CPUs */
>>>>> +    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
>>>>> +    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
>>>>> +    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
>>>>> +    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
>>>>> +    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
>>>>> +    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
>>>>> +    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,
>>>> These don't really belong here.
>>> 
>>> Sorry, I do not understand. These are PVRs which are used in
>>> "cpu-version" DT property. They are logical PVRs but still PVRs - i.e.
>>> the guest has PVR masks for them too.
>> 
>> But they are specific to PAPR, not PowerPC in general, no? And you would
>> never find one in the PVR register of a guest.
> 
> 
> Yes, never. But they all are in the same array in
> arch/powerpc/kernel/cputable.c. How is that different?

They are not CPUs. That's what's different. These numbers don't exist on any real silicon and will never get returned my mfpvr.

> 
> 
>>>>> +
>>>>> #endif /* defined(TARGET_PPC64) */
>>>>>     /* Original POWER */
>>>>>     /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
>>>>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>>>>> index 422a6bb..fc837c1 100644
>>>>> --- a/target-ppc/cpu.h
>>>>> +++ b/target-ppc/cpu.h
>>>>> @@ -999,6 +999,9 @@ struct CPUPPCState {
>>>>>     /* Device control registers */
>>>>>     ppc_dcr_t *dcr_env;
>>>>> 
>>>>> +    /* Architecture compatibility mode */
>>>>> +    uint32_t arch_compat;
>>> 
>>>> Do we really need to carry this in the vcpu struct? Or can we just
>>>> fire-and-forget about it? If we want to preserve anything, it should
>>>> be the PCR register.
>>> This is the current PVR value aka "cpu-version" property. It may change
>>> during reboots as every reboot does the "client-architecture-support"
>>> call so the logical PVR can change.
>> 
>> Ok, so again: What happens on reset / reboot? Do we want to preserve the
>> compatibility setting or does that get reset as well?
> 
> The setting must be preserved on reboot ("client-architecture-support"
> definitely expects it to be preserved). I am not sure about reset, I guess
> it can/should be reset.

Please find out for sure. It's critical we get this right and model it accordingly.


Alex
Paolo Bonzini Nov. 5, 2013, 9:52 a.m. UTC | #10
Il 05/11/2013 10:16, Alexander Graf ha scritto:
> 
> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>>> Why is the option under -machine instead of -cpu?
>>> Because it is still the same CPU and the guest will still read the real
>>> PVR from the hardware (which it may not support but this is why we need
>>> compatibility mode).
>>
>> How do you support migration from a newer to an older CPU then?  I think
>> the guest should never see anything about the hardware CPU model.
> 
> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
> 
> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.

Still in my opinion it should be "-cpu", not "-machine".  Even if it's
just a "virtual" CPU model.

Paolo
Alexander Graf Nov. 5, 2013, 10:27 a.m. UTC | #11
On 05.11.2013, at 10:52, Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 05/11/2013 10:16, Alexander Graf ha scritto:
>> 
>> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> 
>>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>>>> Why is the option under -machine instead of -cpu?
>>>> Because it is still the same CPU and the guest will still read the real
>>>> PVR from the hardware (which it may not support but this is why we need
>>>> compatibility mode).
>>> 
>>> How do you support migration from a newer to an older CPU then?  I think
>>> the guest should never see anything about the hardware CPU model.
>> 
>> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
>> 
>> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.
> 
> Still in my opinion it should be "-cpu", not "-machine".  Even if it's
> just a "virtual" CPU model.

The only thing that this really changes is an SPR (MSR in x86 speech) on an existing cpu model. It's definitely not a new CPU type. If anything it'd be an option to an existing type.


Alex
Paolo Bonzini Nov. 5, 2013, 10:33 a.m. UTC | #12
Il 05/11/2013 11:27, Alexander Graf ha scritto:
> 
> On 05.11.2013, at 10:52, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>> Il 05/11/2013 10:16, Alexander Graf ha scritto:
>>>
>>> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>>
>>>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>>>>> Why is the option under -machine instead of -cpu?
>>>>> Because it is still the same CPU and the guest will still read the real
>>>>> PVR from the hardware (which it may not support but this is why we need
>>>>> compatibility mode).
>>>>
>>>> How do you support migration from a newer to an older CPU then?  I think
>>>> the guest should never see anything about the hardware CPU model.
>>>
>>> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
>>>
>>> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.
>>
>> Still in my opinion it should be "-cpu", not "-machine".  Even if it's
>> just a "virtual" CPU model.
> 
> The only thing that this really changes is an SPR (MSR in x86 speech)
> on an existing cpu model. It's definitely not a new CPU type. If
> anything it'd be an option to an existing type.

Agreed.

Paolo
Alexey Kardashevskiy Nov. 5, 2013, 10:45 a.m. UTC | #13
On 11/05/2013 08:52 PM, Paolo Bonzini wrote:
> Il 05/11/2013 10:16, Alexander Graf ha scritto:
>>
>> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>>>> Why is the option under -machine instead of -cpu?
>>>> Because it is still the same CPU and the guest will still read the real
>>>> PVR from the hardware (which it may not support but this is why we need
>>>> compatibility mode).
>>>
>>> How do you support migration from a newer to an older CPU then?  I think
>>> the guest should never see anything about the hardware CPU model.
>>
>> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
>>
>> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.
> 
> Still in my opinion it should be "-cpu", not "-machine".  Even if it's
> just a "virtual" CPU model.

The compat option itself does not make much sense (yes we could just add
yet another CPU class and that's it) but with the
ibm,client-architecture-support we will have to implement this
compatibility mode anyway. Since the guest can ask for a compatibility mode
change, we either have to support compat option or hot-unplug all (all) CPU
objects in QEMU and hotplug CPUs of the requested model. Or always reset
the guest if it asked for a compatibility mode and recreate CPUs in QEMU
during reset. As for me, the compat option seems simpler.
Paolo Bonzini Nov. 5, 2013, 10:46 a.m. UTC | #14
Il 05/11/2013 11:45, Alexey Kardashevskiy ha scritto:
>> > Still in my opinion it should be "-cpu", not "-machine".  Even if it's
>> > just a "virtual" CPU model.
> The compat option itself does not make much sense (yes we could just add
> yet another CPU class and that's it) but with the
> ibm,client-architecture-support we will have to implement this
> compatibility mode anyway. Since the guest can ask for a compatibility mode
> change, we either have to support compat option or hot-unplug all (all) CPU
> objects in QEMU and hotplug CPUs of the requested model. Or always reset
> the guest if it asked for a compatibility mode and recreate CPUs in QEMU
> during reset. As for me, the compat option seems simpler.

Sure, just make it a suboption of "-cpu" rather than "-machine".

Paolo
Andreas Färber Nov. 5, 2013, 1:53 p.m. UTC | #15
Am 05.11.2013 10:52, schrieb Paolo Bonzini:
> Il 05/11/2013 10:16, Alexander Graf ha scritto:
>>
>> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
>>>>>> Why is the option under -machine instead of -cpu?
>>>> Because it is still the same CPU and the guest will still read the real
>>>> PVR from the hardware (which it may not support but this is why we need
>>>> compatibility mode).
>>>
>>> How do you support migration from a newer to an older CPU then?  I think
>>> the guest should never see anything about the hardware CPU model.
>>
>> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
>>
>> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.
> 
> Still in my opinion it should be "-cpu", not "-machine".  Even if it's
> just a "virtual" CPU model.

PowerPC currently does not have -cpu option parsing. If you need to
implement it, I would ask for a generic hook in CPUClass set by
TYPE_POWERPC_CPU, so that the logic does not get hardcoded in cpu_init,
and for the p=v parsing logic to be so generic as to just set property p
to value v on the CPU instance. I.e. please make the compatibility
settings a static property or dynamic property of the CPU.

Maybe the parsing code could even live in generic qom/cpu.c, overridden
by x86/sparc and reused for arm? Somewhere down my to-do list but
patches appreciated...

Andreas
Paul Mackerras Nov. 6, 2013, 3:27 a.m. UTC | #16
On Tue, Nov 05, 2013 at 10:06:04AM +0100, Paolo Bonzini wrote:
> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
> >> > Why is the option under -machine instead of -cpu?
> > Because it is still the same CPU and the guest will still read the real
> > PVR from the hardware (which it may not support but this is why we need
> > compatibility mode).
> 
> How do you support migration from a newer to an older CPU then? 

PowerVM has supported this for years, so there are well-established
interfaces that existing guest kernels use for this.

Basically, the hypervisor can put the CPU into a mode where the
processor behaves when in user mode according to an earlier version of
the architecture.  POWER8 conforms to architecture 2.07, and it has a
register with bits that allow it to be put into an architecture 2.05
(POWER6) mode or an architecture 2.06 (POWER7) mode.  In 2.05 mode,
none of the instructions and registers that are new in POWER7 and
POWER8 are available in user mode, and in 2.06 mode, none of the
facilities that are new in POWER8 are available in user mode.

The other thing the hypervisor does is tell the guest which
architecture mode the CPU is running in via a property in the device
tree.  Linux kernels use that to determine the list of hardware
features that they can use.

Under PowerVM, if you are running a guest on a POWER8 (say) and you
want to be able to be able to migrate to a POWER6 later on, you have
to run the guest in architecture 2.05 mode, even if the guest knows
about later architecture versions.

> I think
> the guest should never see anything about the hardware CPU model.

The architecture doesn't let us do that, but our guest kernels are
accustomed to largely ignoring the actual PVR and operating on the
basis of the architecture level that we tell them via the device
tree.

Regards,
Paul.
Paul Mackerras Nov. 6, 2013, 5:48 a.m. UTC | #17
On Mon, Sep 30, 2013 at 01:25:32PM +0200, Alexander Graf wrote:
> 
> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
> 
> > To be able to boot on newer hardware that the software support,
> > PowerISA defines a logical PVR, one per every PowerISA specification
> > version from 2.04.
[snip]
> > +    case 205:
> > +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
> > +        break;
> > +    case 206:
> > +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
> 
> Does it make sense to declare compat mode a number or would a string be easier for users? I can imagine that "-machine compat=power6" is easier to understand for a user than "-machine compat=205".

That's probably true.  I don't mind either way.

> Also, we need to handle failure. If the kernel can not set the CPU to 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail out here.

POWER8 does have 2.05 (POWER6) and 2.06 (POWER7) compatibility modes.

> > +    /* Architecture compatibility mode */
> > +    uint32_t arch_compat;
> 
> Do we really need to carry this in the vcpu struct? Or can we just fire-and-forget about it? If we want to preserve anything, it should be the PCR register.

There are two relevant values here; the compatibility mode (if any)
that the user has requested via the command line, and the mode that
has been negotiated with the ibm,client-architecture-support (CAS)
call.  CAS should select the latest mode supported by the guest that
is not later than the mode requested on the command line, and is
supported by QEMU, and is not later than the architecture of the
host.  Both values should be sent across to the destination VM on
migration AFAICS.

On reset/reboot, the compatibility mode should not change.  The device
tree that is supplied to the new SLOF instance should reflect the
current compatibility mode.

Regards,
Paul.
Igor Mammedov Nov. 6, 2013, 11:36 a.m. UTC | #18
On Tue, 05 Nov 2013 14:53:14 +0100
Andreas Färber <afaerber@suse.de> wrote:

> Am 05.11.2013 10:52, schrieb Paolo Bonzini:
> > Il 05/11/2013 10:16, Alexander Graf ha scritto:
> >>
> >> On 05.11.2013, at 10:06, Paolo Bonzini <pbonzini@redhat.com> wrote:
> >>
> >>> Il 30/09/2013 14:57, Alexey Kardashevskiy ha scritto:
> >>>>>> Why is the option under -machine instead of -cpu?
> >>>> Because it is still the same CPU and the guest will still read the real
> >>>> PVR from the hardware (which it may not support but this is why we need
> >>>> compatibility mode).
> >>>
> >>> How do you support migration from a newer to an older CPU then?  I think
> >>> the guest should never see anything about the hardware CPU model.
> >>
> >> POWER can't model that. It always leaks the host CPU information into the guest. It's the guest kernel's responsibility to not expose that change to user space.
> >>
> >> Yes, it's broken :). I'm not even sure there is any sensible way to do live migration between different CPU types.
> > 
> > Still in my opinion it should be "-cpu", not "-machine".  Even if it's
> > just a "virtual" CPU model.
> 
> PowerPC currently does not have -cpu option parsing. If you need to
> implement it, I would ask for a generic hook in CPUClass set by
> TYPE_POWERPC_CPU, so that the logic does not get hardcoded in cpu_init,
> and for the p=v parsing logic to be so generic as to just set property p
> to value v on the CPU instance. I.e. please make the compatibility
> settings a static property or dynamic property of the CPU.
Seconded, that's what is on my to-do list for x86 once x86 properties series
is applied.

moreover, all hook has to do is to translate -cpu to a set of global options,
adding [CPUtype.foo, val] pairs to global option list. Then options will be
automatically applied to any new instance of CPUtype and there won't be any
need for accessing/storing cpu_model string during cpu creation and hotplug.

> 
> Maybe the parsing code could even live in generic qom/cpu.c, overridden
> by x86/sparc and reused for arm? Somewhere down my to-do list but
> patches appreciated...
> 
> Andreas
>
Alexander Graf Nov. 6, 2013, 12:07 p.m. UTC | #19
On 06.11.2013, at 06:48, Paul Mackerras <paulus@samba.org> wrote:

> On Mon, Sep 30, 2013 at 01:25:32PM +0200, Alexander Graf wrote:
>> 
>> On 27.09.2013, at 10:06, Alexey Kardashevskiy wrote:
>> 
>>> To be able to boot on newer hardware that the software support,
>>> PowerISA defines a logical PVR, one per every PowerISA specification
>>> version from 2.04.
> [snip]
>>> +    case 205:
>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
>>> +        break;
>>> +    case 206:
>>> +        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
>> 
>> Does it make sense to declare compat mode a number or would a string be easier for users? I can imagine that "-machine compat=power6" is easier to understand for a user than "-machine compat=205".
> 
> That's probably true.  I don't mind either way.
> 
>> Also, we need to handle failure. If the kernel can not set the CPU to 2.05 mode for example (IIRC POWER8 doesn't allow you to) we should bail out here.
> 
> POWER8 does have 2.05 (POWER6) and 2.06 (POWER7) compatibility modes.
> 
>>> +    /* Architecture compatibility mode */
>>> +    uint32_t arch_compat;
>> 
>> Do we really need to carry this in the vcpu struct? Or can we just fire-and-forget about it? If we want to preserve anything, it should be the PCR register.
> 
> There are two relevant values here; the compatibility mode (if any)
> that the user has requested via the command line, and the mode that
> has been negotiated with the ibm,client-architecture-support (CAS)
> call.  CAS should select the latest mode supported by the guest that
> is not later than the mode requested on the command line, and is
> supported by QEMU, and is not later than the architecture of the
> host.  Both values should be sent across to the destination VM on
> migration AFAICS.

So how does this work on pHyp? I thought the guest always comes up in full native compat mode and downgrades itself then?

> On reset/reboot, the compatibility mode should not change.  The device
> tree that is supplied to the new SLOF instance should reflect the
> current compatibility mode.

Ok, this should work if we just don't touch PCR across reset.


Alex
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a09a1d9..737452d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -33,6 +33,7 @@ 
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
+#include "cpu-models.h"
 
 #include "hw/boards.h"
 #include "hw/ppc/ppc.h"
@@ -196,6 +197,26 @@  static XICSState *xics_system_init(int nr_servers, int nr_irqs)
     return icp;
 }
 
+static void spapr_compat_mode_init(sPAPREnvironment *spapr)
+{
+    QemuOpts *machine_opts = qemu_get_machine_opts();
+    uint64_t compat = qemu_opt_get_number(machine_opts, "compat", 0);
+
+    switch (compat) {
+    case 0:
+        break;
+    case 205:
+        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_05;
+        break;
+    case 206:
+        spapr->arch_compat = CPU_POWERPC_LOGICAL_2_06;
+        break;
+    default:
+        perror("Unsupported mode, only are 205, 206 supported\n");
+        break;
+    }
+}
+
 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
     int ret = 0, offset;
@@ -206,6 +227,7 @@  static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 
     CPU_FOREACH(cpu) {
         DeviceClass *dc = DEVICE_GET_CLASS(cpu);
+        CPUPPCState *env = &(POWERPC_CPU(cpu)->env);
         uint32_t associativity[] = {cpu_to_be32(0x5),
                                     cpu_to_be32(0x0),
                                     cpu_to_be32(0x0),
@@ -238,6 +260,14 @@  static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
         if (ret < 0) {
             return ret;
         }
+
+        if (env->arch_compat) {
+            ret = fdt_setprop(fdt, offset, "cpu-version",
+                              &env->arch_compat, sizeof(env->arch_compat));
+            if (ret < 0) {
+                return ret;
+            }
+        }
     }
     return ret;
 }
@@ -1145,6 +1175,8 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
     spapr = g_malloc0(sizeof(*spapr));
     QLIST_INIT(&spapr->phbs);
 
+    spapr_compat_mode_init(spapr);
+
     cpu_ppc_hypercall = emulate_spapr_hypercall;
 
     /* Allocate RMA if necessary */
@@ -1226,6 +1258,14 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
 
         xics_cpu_setup(spapr->icp, cpu);
 
+        /*
+         * If compat mode is set in the command line, pass it to CPU so KVM
+         * will be able to set it in the host kernel.
+         */
+        if (spapr->arch_compat) {
+            env->arch_compat = spapr->arch_compat;
+        }
+
         qemu_register_reset(spapr_cpu_reset, cpu);
     }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index ca175b0..201c578 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -34,6 +34,8 @@  typedef struct sPAPREnvironment {
     uint32_t epow_irq;
     Notifier epow_notifier;
 
+    uint32_t arch_compat;        /* Compatible PVR from the command line */
+
     /* Migration state */
     int htab_save_index;
     bool htab_first_pass;
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index 49ba4a4..d7c033c 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -583,6 +583,16 @@  enum {
     CPU_POWERPC_RS64II             = 0x00340000,
     CPU_POWERPC_RS64III            = 0x00360000,
     CPU_POWERPC_RS64IV             = 0x00370000,
+
+    /* Logical CPUs */
+    CPU_POWERPC_LOGICAL_MASK       = 0xFFFFFFFF,
+    CPU_POWERPC_LOGICAL_2_04       = 0x0F000001,
+    CPU_POWERPC_LOGICAL_2_05       = 0x0F000002,
+    CPU_POWERPC_LOGICAL_2_06       = 0x0F000003,
+    CPU_POWERPC_LOGICAL_2_06_PLUS  = 0x0F100003,
+    CPU_POWERPC_LOGICAL_2_07       = 0x0F000004,
+    CPU_POWERPC_LOGICAL_2_08       = 0x0F000005,
+
 #endif /* defined(TARGET_PPC64) */
     /* Original POWER */
     /* XXX: should be POWER (RIOS), RSC3308, RSC4608,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 422a6bb..fc837c1 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -999,6 +999,9 @@  struct CPUPPCState {
     /* Device control registers */
     ppc_dcr_t *dcr_env;
 
+    /* Architecture compatibility mode */
+    uint32_t arch_compat;
+
     int dcache_line_size;
     int icache_line_size;
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 4bc4496..7b853a3 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -872,6 +872,7 @@  int kvm_arch_put_registers(CPUState *cs, int level)
                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
             }
         }
+        kvm_set_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
 #endif /* TARGET_PPC64 */
     }
 
@@ -1083,6 +1084,7 @@  int kvm_arch_get_registers(CPUState *cs)
                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
             }
         }
+        kvm_get_one_reg(cs, KVM_REG_PPC_ARCH_COMPAT, &env->arch_compat);
 #endif
     }
 
diff --git a/vl.c b/vl.c
index 4e709d5..90dad7b 100644
--- a/vl.c
+++ b/vl.c
@@ -427,6 +427,10 @@  static QemuOptsList qemu_machine_opts = {
             .name = "usb",
             .type = QEMU_OPT_BOOL,
             .help = "Set on/off to enable/disable usb",
+        }, {
+            .name = "compat",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Selects compatibility mode on CPU",
         },
         { /* End of list */ }
     },