diff mbox series

[for-6.0,4/9] spapr: Set compat mode in spapr_reset_vcpu()

Message ID 20201120234208.683521-5-groug@kaod.org
State New
Headers show
Series spapr: Perform hotplug sanity checks at pre-plug | expand

Commit Message

Greg Kurz Nov. 20, 2020, 11:42 p.m. UTC
When it comes to resetting the compat mode of the vCPUS, there are
two situations to consider:
(1) machine reset should set the compat mode back to the machine default,
    ie. spapr->max_compat_pvr
(2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
    ie. POWERPC_CPU(first_cpu)->compat_pvr

This is currently handled in two separate places: globally for all vCPUs
from the machine reset code for (1) and for each thread of a core from
the hotplug path for (2).

Since the machine reset code already resets all vCPUs, starting with boot
vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
vCPU so that it resets its compat mode back to the machine default. Any
other vCPU just need to match the compat mode of the boot vCPU.

Failing to set the compat mode during machine reset is a fatal error,
but not for hot plugged vCPUs. This is arguable because if we've been
able to set the boot vCPU compat mode at CAS or during machine reset,
it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
already has a fatal error path for kvm_check_mmu() failures, do the
same for ppc_set_compat().

This gets rid of an error path in spapr_core_plug(). It will allow
further simplifications.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr.c          | 16 ----------------
 hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
 2 files changed, 13 insertions(+), 16 deletions(-)

Comments

David Gibson Nov. 23, 2020, 5:11 a.m. UTC | #1
On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> When it comes to resetting the compat mode of the vCPUS, there are
> two situations to consider:
> (1) machine reset should set the compat mode back to the machine default,
>     ie. spapr->max_compat_pvr
> (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
>     ie. POWERPC_CPU(first_cpu)->compat_pvr
> 
> This is currently handled in two separate places: globally for all vCPUs
> from the machine reset code for (1) and for each thread of a core from
> the hotplug path for (2).
> 
> Since the machine reset code already resets all vCPUs, starting with boot
> vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> vCPU so that it resets its compat mode back to the machine default. Any
> other vCPU just need to match the compat mode of the boot vCPU.
> 
> Failing to set the compat mode during machine reset is a fatal error,
> but not for hot plugged vCPUs. This is arguable because if we've been
> able to set the boot vCPU compat mode at CAS or during machine reset,
> it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> already has a fatal error path for kvm_check_mmu() failures, do the
> same for ppc_set_compat().
> 
> This gets rid of an error path in spapr_core_plug(). It will allow
> further simplifications.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  hw/ppc/spapr.c          | 16 ----------------
>  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
>  2 files changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index f58f77389e8e..da7586f548df 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
>      spapr_ovec_cleanup(spapr->ov5_cas);
>      spapr->ov5_cas = spapr_ovec_new();
>  
> -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> -
>      /*
>       * This is fixing some of the default configuration of the XIVE
>       * devices. To be called after the reset of the machine devices.
> @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>  
>      core_slot->cpu = OBJECT(dev);
>  
> -    /*
> -     * Set compatibility mode to match the boot CPU, which was either set
> -     * by the machine reset code or by CAS.
> -     */
> -    if (hotplugged) {
> -        for (i = 0; i < cc->nr_threads; i++) {
> -            if (ppc_set_compat(core->threads[i],
> -                               POWERPC_CPU(first_cpu)->compat_pvr,
> -                               errp) < 0) {
> -                return;
> -            }
> -        }
> -    }
> -
>      if (smc->pre_2_10_has_unused_icps) {
>          for (i = 0; i < cc->nr_threads; i++) {
>              cs = CPU(core->threads[i]);
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index 2f7dc3c23ded..17741a3fb77f 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -27,6 +27,7 @@
>  
>  static void spapr_reset_vcpu(PowerPCCPU *cpu)
>  {
> +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
>      CPUState *cs = CPU(cpu);
>      CPUPPCState *env = &cpu->env;
>      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
>      kvm_check_mmu(cpu, &error_fatal);
>  
>      spapr_irq_cpu_intc_reset(spapr, cpu);
> +
> +    /*
> +     * The boot CPU is only reset during machine reset : reset its
> +     * compatibility mode to the machine default. For other CPUs,
> +     * either cold plugged or hot plugged, set the compatibility mode
> +     * to match the boot CPU, which was either set by the machine reset
> +     * code or by CAS.
> +     */
> +    ppc_set_compat(cpu,
> +                   cpu == first_ppc_cpu ?
> +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> +                   &error_fatal);

This assumes that when it is called for a non-boot CPU, it has already
been called for the boot CPU..  Are we certain that's guaranteed by
the sequence of reset calls during a full machine reset?

>  }
>  
>  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
Greg Kurz Nov. 23, 2020, 11:51 a.m. UTC | #2
On Mon, 23 Nov 2020 16:11:30 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > When it comes to resetting the compat mode of the vCPUS, there are
> > two situations to consider:
> > (1) machine reset should set the compat mode back to the machine default,
> >     ie. spapr->max_compat_pvr
> > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > 
> > This is currently handled in two separate places: globally for all vCPUs
> > from the machine reset code for (1) and for each thread of a core from
> > the hotplug path for (2).
> > 
> > Since the machine reset code already resets all vCPUs, starting with boot
> > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > vCPU so that it resets its compat mode back to the machine default. Any
> > other vCPU just need to match the compat mode of the boot vCPU.
> > 
> > Failing to set the compat mode during machine reset is a fatal error,
> > but not for hot plugged vCPUs. This is arguable because if we've been
> > able to set the boot vCPU compat mode at CAS or during machine reset,
> > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > already has a fatal error path for kvm_check_mmu() failures, do the
> > same for ppc_set_compat().
> > 
> > This gets rid of an error path in spapr_core_plug(). It will allow
> > further simplifications.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  hw/ppc/spapr.c          | 16 ----------------
> >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> >  2 files changed, 13 insertions(+), 16 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index f58f77389e8e..da7586f548df 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> >      spapr_ovec_cleanup(spapr->ov5_cas);
> >      spapr->ov5_cas = spapr_ovec_new();
> >  
> > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > -
> >      /*
> >       * This is fixing some of the default configuration of the XIVE
> >       * devices. To be called after the reset of the machine devices.
> > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> >  
> >      core_slot->cpu = OBJECT(dev);
> >  
> > -    /*
> > -     * Set compatibility mode to match the boot CPU, which was either set
> > -     * by the machine reset code or by CAS.
> > -     */
> > -    if (hotplugged) {
> > -        for (i = 0; i < cc->nr_threads; i++) {
> > -            if (ppc_set_compat(core->threads[i],
> > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > -                               errp) < 0) {
> > -                return;
> > -            }
> > -        }
> > -    }
> > -
> >      if (smc->pre_2_10_has_unused_icps) {
> >          for (i = 0; i < cc->nr_threads; i++) {
> >              cs = CPU(core->threads[i]);
> > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > index 2f7dc3c23ded..17741a3fb77f 100644
> > --- a/hw/ppc/spapr_cpu_core.c
> > +++ b/hw/ppc/spapr_cpu_core.c
> > @@ -27,6 +27,7 @@
> >  
> >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> >  {
> > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> >      CPUState *cs = CPU(cpu);
> >      CPUPPCState *env = &cpu->env;
> >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> >      kvm_check_mmu(cpu, &error_fatal);
> >  
> >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > +
> > +    /*
> > +     * The boot CPU is only reset during machine reset : reset its
> > +     * compatibility mode to the machine default. For other CPUs,
> > +     * either cold plugged or hot plugged, set the compatibility mode
> > +     * to match the boot CPU, which was either set by the machine reset
> > +     * code or by CAS.
> > +     */
> > +    ppc_set_compat(cpu,
> > +                   cpu == first_ppc_cpu ?
> > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > +                   &error_fatal);
> 
> This assumes that when it is called for a non-boot CPU, it has already
> been called for the boot CPU..  Are we certain that's guaranteed by
> the sequence of reset calls during a full machine reset?
> 

This happens to be the case. Basically because the boot CPU core
is created (including registering its reset handler) first and
qemu_devices_reset() calls handlers in the same order they were
registered.

> >  }
> >  
> >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
>
David Gibson Nov. 25, 2020, 2:39 a.m. UTC | #3
On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:
> On Mon, 23 Nov 2020 16:11:30 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > > When it comes to resetting the compat mode of the vCPUS, there are
> > > two situations to consider:
> > > (1) machine reset should set the compat mode back to the machine default,
> > >     ie. spapr->max_compat_pvr
> > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > 
> > > This is currently handled in two separate places: globally for all vCPUs
> > > from the machine reset code for (1) and for each thread of a core from
> > > the hotplug path for (2).
> > > 
> > > Since the machine reset code already resets all vCPUs, starting with boot
> > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > vCPU so that it resets its compat mode back to the machine default. Any
> > > other vCPU just need to match the compat mode of the boot vCPU.
> > > 
> > > Failing to set the compat mode during machine reset is a fatal error,
> > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > same for ppc_set_compat().
> > > 
> > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > further simplifications.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > ---
> > >  hw/ppc/spapr.c          | 16 ----------------
> > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index f58f77389e8e..da7586f548df 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > >      spapr->ov5_cas = spapr_ovec_new();
> > >  
> > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > -
> > >      /*
> > >       * This is fixing some of the default configuration of the XIVE
> > >       * devices. To be called after the reset of the machine devices.
> > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > >  
> > >      core_slot->cpu = OBJECT(dev);
> > >  
> > > -    /*
> > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > -     * by the machine reset code or by CAS.
> > > -     */
> > > -    if (hotplugged) {
> > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > -            if (ppc_set_compat(core->threads[i],
> > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > -                               errp) < 0) {
> > > -                return;
> > > -            }
> > > -        }
> > > -    }
> > > -
> > >      if (smc->pre_2_10_has_unused_icps) {
> > >          for (i = 0; i < cc->nr_threads; i++) {
> > >              cs = CPU(core->threads[i]);
> > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > --- a/hw/ppc/spapr_cpu_core.c
> > > +++ b/hw/ppc/spapr_cpu_core.c
> > > @@ -27,6 +27,7 @@
> > >  
> > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > >  {
> > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > >      CPUState *cs = CPU(cpu);
> > >      CPUPPCState *env = &cpu->env;
> > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > >      kvm_check_mmu(cpu, &error_fatal);
> > >  
> > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > +
> > > +    /*
> > > +     * The boot CPU is only reset during machine reset : reset its
> > > +     * compatibility mode to the machine default. For other CPUs,
> > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > +     * to match the boot CPU, which was either set by the machine reset
> > > +     * code or by CAS.
> > > +     */
> > > +    ppc_set_compat(cpu,
> > > +                   cpu == first_ppc_cpu ?
> > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > +                   &error_fatal);
> > 
> > This assumes that when it is called for a non-boot CPU, it has already
> > been called for the boot CPU..  Are we certain that's guaranteed by
> > the sequence of reset calls during a full machine reset?
> > 
> 
> This happens to be the case. Basically because the boot CPU core
> is created (including registering its reset handler) first and
> qemu_devices_reset() calls handlers in the same order they were
> registered.

Right, I assumed it works for now, but it seems rather fragile, since
I'm not sure we're relying on guaranteed properties of the interface.
Is there any way we could at least assert() if things are called out
of order?

> 
> > >  }
> > >  
> > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
> > 
>
Greg Kurz Nov. 25, 2020, 9:51 a.m. UTC | #4
On Wed, 25 Nov 2020 13:39:47 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:
> > On Mon, 23 Nov 2020 16:11:30 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > two situations to consider:
> > > > (1) machine reset should set the compat mode back to the machine default,
> > > >     ie. spapr->max_compat_pvr
> > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > 
> > > > This is currently handled in two separate places: globally for all vCPUs
> > > > from the machine reset code for (1) and for each thread of a core from
> > > > the hotplug path for (2).
> > > > 
> > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > 
> > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > same for ppc_set_compat().
> > > > 
> > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > further simplifications.
> > > > 
> > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > ---
> > > >  hw/ppc/spapr.c          | 16 ----------------
> > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > index f58f77389e8e..da7586f548df 100644
> > > > --- a/hw/ppc/spapr.c
> > > > +++ b/hw/ppc/spapr.c
> > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > >      spapr->ov5_cas = spapr_ovec_new();
> > > >  
> > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > -
> > > >      /*
> > > >       * This is fixing some of the default configuration of the XIVE
> > > >       * devices. To be called after the reset of the machine devices.
> > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > >  
> > > >      core_slot->cpu = OBJECT(dev);
> > > >  
> > > > -    /*
> > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > -     * by the machine reset code or by CAS.
> > > > -     */
> > > > -    if (hotplugged) {
> > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > -            if (ppc_set_compat(core->threads[i],
> > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > -                               errp) < 0) {
> > > > -                return;
> > > > -            }
> > > > -        }
> > > > -    }
> > > > -
> > > >      if (smc->pre_2_10_has_unused_icps) {
> > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > >              cs = CPU(core->threads[i]);
> > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > @@ -27,6 +27,7 @@
> > > >  
> > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > >  {
> > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > >      CPUState *cs = CPU(cpu);
> > > >      CPUPPCState *env = &cpu->env;
> > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > >      kvm_check_mmu(cpu, &error_fatal);
> > > >  
> > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > +
> > > > +    /*
> > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > +     * code or by CAS.
> > > > +     */
> > > > +    ppc_set_compat(cpu,
> > > > +                   cpu == first_ppc_cpu ?
> > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > +                   &error_fatal);
> > > 
> > > This assumes that when it is called for a non-boot CPU, it has already
> > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > the sequence of reset calls during a full machine reset?
> > > 
> > 
> > This happens to be the case. Basically because the boot CPU core
> > is created (including registering its reset handler) first and
> > qemu_devices_reset() calls handlers in the same order they were
> > registered.
> 
> Right, I assumed it works for now, but it seems rather fragile, since
> I'm not sure we're relying on guaranteed properties of the interface.

The reset handler interface is absolutely undocumented, so I guess we
have no formal guarantees at the present time. But since the current
implementation has the property, would it be acceptable to carve it
in stone with added documentation ? In the event of unlikely changes
to the reset handler logic, people would _just_ need to make sure
handlers are called in the same order they were registered.

> Is there any way we could at least assert() if things are called out
> of order?
> 

Maybe. I'll look into it.

> > 
> > > >  }
> > > >  
> > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
> > > 
> > 
> 
> 
>
David Gibson Nov. 26, 2020, 4:57 a.m. UTC | #5
On Wed, Nov 25, 2020 at 10:51:05AM +0100, Greg Kurz wrote:
> On Wed, 25 Nov 2020 13:39:47 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:
> > > On Mon, 23 Nov 2020 16:11:30 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > > two situations to consider:
> > > > > (1) machine reset should set the compat mode back to the machine default,
> > > > >     ie. spapr->max_compat_pvr
> > > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > > 
> > > > > This is currently handled in two separate places: globally for all vCPUs
> > > > > from the machine reset code for (1) and for each thread of a core from
> > > > > the hotplug path for (2).
> > > > > 
> > > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > > 
> > > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > > same for ppc_set_compat().
> > > > > 
> > > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > > further simplifications.
> > > > > 
> > > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > > ---
> > > > >  hw/ppc/spapr.c          | 16 ----------------
> > > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > > 
> > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > index f58f77389e8e..da7586f548df 100644
> > > > > --- a/hw/ppc/spapr.c
> > > > > +++ b/hw/ppc/spapr.c
> > > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > > >      spapr->ov5_cas = spapr_ovec_new();
> > > > >  
> > > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > > -
> > > > >      /*
> > > > >       * This is fixing some of the default configuration of the XIVE
> > > > >       * devices. To be called after the reset of the machine devices.
> > > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > > >  
> > > > >      core_slot->cpu = OBJECT(dev);
> > > > >  
> > > > > -    /*
> > > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > > -     * by the machine reset code or by CAS.
> > > > > -     */
> > > > > -    if (hotplugged) {
> > > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > > -            if (ppc_set_compat(core->threads[i],
> > > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > > -                               errp) < 0) {
> > > > > -                return;
> > > > > -            }
> > > > > -        }
> > > > > -    }
> > > > > -
> > > > >      if (smc->pre_2_10_has_unused_icps) {
> > > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > > >              cs = CPU(core->threads[i]);
> > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > @@ -27,6 +27,7 @@
> > > > >  
> > > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > >  {
> > > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > > >      CPUState *cs = CPU(cpu);
> > > > >      CPUPPCState *env = &cpu->env;
> > > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > >      kvm_check_mmu(cpu, &error_fatal);
> > > > >  
> > > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > > +
> > > > > +    /*
> > > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > > +     * code or by CAS.
> > > > > +     */
> > > > > +    ppc_set_compat(cpu,
> > > > > +                   cpu == first_ppc_cpu ?
> > > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > > +                   &error_fatal);
> > > > 
> > > > This assumes that when it is called for a non-boot CPU, it has already
> > > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > > the sequence of reset calls during a full machine reset?
> > > > 
> > > 
> > > This happens to be the case. Basically because the boot CPU core
> > > is created (including registering its reset handler) first and
> > > qemu_devices_reset() calls handlers in the same order they were
> > > registered.
> > 
> > Right, I assumed it works for now, but it seems rather fragile, since
> > I'm not sure we're relying on guaranteed properties of the interface.
> 
> The reset handler interface is absolutely undocumented, so I guess we
> have no formal guarantees at the present time. But since the current
> implementation has the property, would it be acceptable to carve it
> in stone with added documentation ? In the event of unlikely changes
> to the reset handler logic, people would _just_ need to make sure
> handlers are called in the same order they were registered.

Yeah, maybe.

One other thing occurs to me: will we still do things in the right
order if the (initial) boot cpu is hot unplugged, then replugged
before a reset?

> > Is there any way we could at least assert() if things are called out
> > of order?
> > 
> 
> Maybe. I'll look into it.
> 
> > > 
> > > > >  }
> > > > >  
> > > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
> > > > 
> > > 
> > 
> > 
> > 
>
Greg Kurz Nov. 26, 2020, 9:10 a.m. UTC | #6
On Thu, 26 Nov 2020 15:57:37 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Wed, Nov 25, 2020 at 10:51:05AM +0100, Greg Kurz wrote:
> > On Wed, 25 Nov 2020 13:39:47 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:
> > > > On Mon, 23 Nov 2020 16:11:30 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > 
> > > > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > > > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > > > two situations to consider:
> > > > > > (1) machine reset should set the compat mode back to the machine default,
> > > > > >     ie. spapr->max_compat_pvr
> > > > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > > > 
> > > > > > This is currently handled in two separate places: globally for all vCPUs
> > > > > > from the machine reset code for (1) and for each thread of a core from
> > > > > > the hotplug path for (2).
> > > > > > 
> > > > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > > > 
> > > > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > > > same for ppc_set_compat().
> > > > > > 
> > > > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > > > further simplifications.
> > > > > > 
> > > > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > > > ---
> > > > > >  hw/ppc/spapr.c          | 16 ----------------
> > > > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > > > 
> > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > index f58f77389e8e..da7586f548df 100644
> > > > > > --- a/hw/ppc/spapr.c
> > > > > > +++ b/hw/ppc/spapr.c
> > > > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > > > >      spapr->ov5_cas = spapr_ovec_new();
> > > > > >  
> > > > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > > > -
> > > > > >      /*
> > > > > >       * This is fixing some of the default configuration of the XIVE
> > > > > >       * devices. To be called after the reset of the machine devices.
> > > > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > > > >  
> > > > > >      core_slot->cpu = OBJECT(dev);
> > > > > >  
> > > > > > -    /*
> > > > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > > > -     * by the machine reset code or by CAS.
> > > > > > -     */
> > > > > > -    if (hotplugged) {
> > > > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > > > -            if (ppc_set_compat(core->threads[i],
> > > > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > > > -                               errp) < 0) {
> > > > > > -                return;
> > > > > > -            }
> > > > > > -        }
> > > > > > -    }
> > > > > > -
> > > > > >      if (smc->pre_2_10_has_unused_icps) {
> > > > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > > > >              cs = CPU(core->threads[i]);
> > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > @@ -27,6 +27,7 @@
> > > > > >  
> > > > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > >  {
> > > > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > > > >      CPUState *cs = CPU(cpu);
> > > > > >      CPUPPCState *env = &cpu->env;
> > > > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > >      kvm_check_mmu(cpu, &error_fatal);
> > > > > >  
> > > > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > > > +
> > > > > > +    /*
> > > > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > > > +     * code or by CAS.
> > > > > > +     */
> > > > > > +    ppc_set_compat(cpu,
> > > > > > +                   cpu == first_ppc_cpu ?
> > > > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > > > +                   &error_fatal);
> > > > > 
> > > > > This assumes that when it is called for a non-boot CPU, it has already
> > > > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > > > the sequence of reset calls during a full machine reset?
> > > > > 
> > > > 
> > > > This happens to be the case. Basically because the boot CPU core
> > > > is created (including registering its reset handler) first and
> > > > qemu_devices_reset() calls handlers in the same order they were
> > > > registered.
> > > 
> > > Right, I assumed it works for now, but it seems rather fragile, since
> > > I'm not sure we're relying on guaranteed properties of the interface.
> > 
> > The reset handler interface is absolutely undocumented, so I guess we
> > have no formal guarantees at the present time. But since the current
> > implementation has the property, would it be acceptable to carve it
> > in stone with added documentation ? In the event of unlikely changes
> > to the reset handler logic, people would _just_ need to make sure
> > handlers are called in the same order they were registered.
> 
> Yeah, maybe.
> 
> One other thing occurs to me: will we still do things in the right
> order if the (initial) boot cpu is hot unplugged, then replugged
> before a reset?
> 

This can't happen AFAICT.

(qemu) qom-get /machine/unattached/device[1] core-id
0
(qemu) device_del /machine/unattached/device[1]
Error: Boot CPU core may not be unplugged

commit 62be8b044adf47327ebefdefb25f28a40316ebd0
Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
Date:   Wed Jul 27 10:44:42 2016 +0530

    spapr: Prevent boot CPU core removal


So yes, this adds yet another road block on the way to support hot
unplug of the boot CPU. Is this a concern ?

If we go forward with this patch, maybe I should mention in the
changelog/documentation the various assumptions which this patch
is made under:
- reset handlers are called in the same order they were registered
- boot CPU registers its reset handler before other CPUs
- boot CPU cannot be hot unplugged

These guarantee that the boot core is always reset before other
cores during reset.

> > > Is there any way we could at least assert() if things are called out
> > > of order?
> > > 
> > 
> > Maybe. I'll look into it.
> > 
> > > > 
> > > > > >  }
> > > > > >  
> > > > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
> > > > > 
> > > > 
> > > 
> > > 
> > > 
> > 
> 
> 
>
Igor Mammedov Nov. 26, 2020, 4:23 p.m. UTC | #7
On Thu, 26 Nov 2020 10:10:27 +0100
Greg Kurz <groug@kaod.org> wrote:

> On Thu, 26 Nov 2020 15:57:37 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Nov 25, 2020 at 10:51:05AM +0100, Greg Kurz wrote:  
> > > On Wed, 25 Nov 2020 13:39:47 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >   
> > > > On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:  
> > > > > On Mon, 23 Nov 2020 16:11:30 +1100
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > >   
> > > > > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:  
> > > > > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > > > > two situations to consider:
> > > > > > > (1) machine reset should set the compat mode back to the machine default,
> > > > > > >     ie. spapr->max_compat_pvr
> > > > > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > > > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > > > > 
> > > > > > > This is currently handled in two separate places: globally for all vCPUs
> > > > > > > from the machine reset code for (1) and for each thread of a core from
> > > > > > > the hotplug path for (2).
> > > > > > > 
> > > > > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > > > > 
> > > > > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > > > > same for ppc_set_compat().
> > > > > > > 
> > > > > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > > > > further simplifications.
> > > > > > > 
> > > > > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > > > > ---
> > > > > > >  hw/ppc/spapr.c          | 16 ----------------
> > > > > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > > > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > > index f58f77389e8e..da7586f548df 100644
> > > > > > > --- a/hw/ppc/spapr.c
> > > > > > > +++ b/hw/ppc/spapr.c
> > > > > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > > > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > > > > >      spapr->ov5_cas = spapr_ovec_new();
> > > > > > >  
> > > > > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > > > > -
> > > > > > >      /*
> > > > > > >       * This is fixing some of the default configuration of the XIVE
> > > > > > >       * devices. To be called after the reset of the machine devices.
> > > > > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > > > > >  
> > > > > > >      core_slot->cpu = OBJECT(dev);
> > > > > > >  
> > > > > > > -    /*
> > > > > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > > > > -     * by the machine reset code or by CAS.
> > > > > > > -     */
> > > > > > > -    if (hotplugged) {
> > > > > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > > > > -            if (ppc_set_compat(core->threads[i],
> > > > > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > > > > -                               errp) < 0) {
> > > > > > > -                return;
> > > > > > > -            }
> > > > > > > -        }
> > > > > > > -    }
> > > > > > > -
> > > > > > >      if (smc->pre_2_10_has_unused_icps) {
> > > > > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > > > > >              cs = CPU(core->threads[i]);
> > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > > @@ -27,6 +27,7 @@
> > > > > > >  
> > > > > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > >  {
> > > > > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > > > > >      CPUState *cs = CPU(cpu);
> > > > > > >      CPUPPCState *env = &cpu->env;
> > > > > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > >      kvm_check_mmu(cpu, &error_fatal);
> > > > > > >  
> > > > > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > > > > +
> > > > > > > +    /*
> > > > > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > > > > +     * code or by CAS.
> > > > > > > +     */
> > > > > > > +    ppc_set_compat(cpu,
> > > > > > > +                   cpu == first_ppc_cpu ?
> > > > > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > > > > +                   &error_fatal);  
> > > > > > 
> > > > > > This assumes that when it is called for a non-boot CPU, it has already
> > > > > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > > > > the sequence of reset calls during a full machine reset?
> > > > > >   
> > > > > 
> > > > > This happens to be the case. Basically because the boot CPU core
> > > > > is created (including registering its reset handler) first and
> > > > > qemu_devices_reset() calls handlers in the same order they were
> > > > > registered.  
> > > > 
> > > > Right, I assumed it works for now, but it seems rather fragile, since
> > > > I'm not sure we're relying on guaranteed properties of the interface.  
> > > 
> > > The reset handler interface is absolutely undocumented, so I guess we
> > > have no formal guarantees at the present time. But since the current
> > > implementation has the property, would it be acceptable to carve it
> > > in stone with added documentation ? In the event of unlikely changes
> > > to the reset handler logic, people would _just_ need to make sure
> > > handlers are called in the same order they were registered.  
> > 
> > Yeah, maybe.
> > 
> > One other thing occurs to me: will we still do things in the right
> > order if the (initial) boot cpu is hot unplugged, then replugged
> > before a reset?
> >   
> 
> This can't happen AFAICT.
> 
> (qemu) qom-get /machine/unattached/device[1] core-id
> 0
> (qemu) device_del /machine/unattached/device[1]
> Error: Boot CPU core may not be unplugged
> 
> commit 62be8b044adf47327ebefdefb25f28a40316ebd0
> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Date:   Wed Jul 27 10:44:42 2016 +0530
> 
>     spapr: Prevent boot CPU core removal
> 
> 
> So yes, this adds yet another road block on the way to support hot
> unplug of the boot CPU. Is this a concern ?
> 
> If we go forward with this patch, maybe I should mention in the
> changelog/documentation the various assumptions which this patch
> is made under:
> - reset handlers are called in the same order they were registered
> - boot CPU registers its reset handler before other CPUs
> - boot CPU cannot be hot unplugged
> 
> These guarantee that the boot core is always reset before other
> cores during reset.
it might work for now but it seems fragile to me.

What if we  make compat mode a property and move setting it to machine code,
more precisely treat it like any other cpu feature property.
  
 if(need_compat_more)
    register_global_property(compat_mode)

that way when any cpu is created it will have this property set
and it won't depend on the order CPUs are created/reset

> 
> > > > Is there any way we could at least assert() if things are called out
> > > > of order?
> > > >   
> > > 
> > > Maybe. I'll look into it.
> > >   
> > > > >   
> > > > > > >  }
> > > > > > >  
> > > > > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,  
> > > > > >   
> > > > >   
> > > > 
> > > > 
> > > >   
> > >   
> > 
> > 
> >   
>
Igor Mammedov Nov. 26, 2020, 8:53 p.m. UTC | #8
On Thu, 26 Nov 2020 17:23:20 +0100
Igor Mammedov <imammedo@redhat.com> wrote:

> On Thu, 26 Nov 2020 10:10:27 +0100
> Greg Kurz <groug@kaod.org> wrote:
> 
> > On Thu, 26 Nov 2020 15:57:37 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >   
> > > On Wed, Nov 25, 2020 at 10:51:05AM +0100, Greg Kurz wrote:    
> > > > On Wed, 25 Nov 2020 13:39:47 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > >     
> > > > > On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:    
> > > > > > On Mon, 23 Nov 2020 16:11:30 +1100
> > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > >     
> > > > > > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:    
> > > > > > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > > > > > two situations to consider:
> > > > > > > > (1) machine reset should set the compat mode back to the machine default,
> > > > > > > >     ie. spapr->max_compat_pvr
> > > > > > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > > > > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > > > > > 
> > > > > > > > This is currently handled in two separate places: globally for all vCPUs
> > > > > > > > from the machine reset code for (1) and for each thread of a core from
> > > > > > > > the hotplug path for (2).
> > > > > > > > 
> > > > > > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > > > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > > > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > > > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > > > > > 
> > > > > > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > > > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > > > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > > > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > > > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > > > > > same for ppc_set_compat().
> > > > > > > > 
> > > > > > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > > > > > further simplifications.
> > > > > > > > 
> > > > > > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > > > > > ---
> > > > > > > >  hw/ppc/spapr.c          | 16 ----------------
> > > > > > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > > > > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > > > index f58f77389e8e..da7586f548df 100644
> > > > > > > > --- a/hw/ppc/spapr.c
> > > > > > > > +++ b/hw/ppc/spapr.c
> > > > > > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > > > > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > > > > > >      spapr->ov5_cas = spapr_ovec_new();
> > > > > > > >  
> > > > > > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > > > > > -
> > > > > > > >      /*
> > > > > > > >       * This is fixing some of the default configuration of the XIVE
> > > > > > > >       * devices. To be called after the reset of the machine devices.
> > > > > > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > > > > > >  
> > > > > > > >      core_slot->cpu = OBJECT(dev);
> > > > > > > >  
> > > > > > > > -    /*
> > > > > > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > > > > > -     * by the machine reset code or by CAS.
> > > > > > > > -     */
> > > > > > > > -    if (hotplugged) {
> > > > > > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > > > > > -            if (ppc_set_compat(core->threads[i],
> > > > > > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > > > > > -                               errp) < 0) {
> > > > > > > > -                return;
> > > > > > > > -            }
> > > > > > > > -        }
> > > > > > > > -    }
> > > > > > > > -
> > > > > > > >      if (smc->pre_2_10_has_unused_icps) {
> > > > > > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > > > > > >              cs = CPU(core->threads[i]);
> > > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > > > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > > > @@ -27,6 +27,7 @@
> > > > > > > >  
> > > > > > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > > >  {
> > > > > > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > > > > > >      CPUState *cs = CPU(cpu);
> > > > > > > >      CPUPPCState *env = &cpu->env;
> > > > > > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > > > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > > >      kvm_check_mmu(cpu, &error_fatal);
> > > > > > > >  
> > > > > > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > > > > > +
> > > > > > > > +    /*
> > > > > > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > > > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > > > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > > > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > > > > > +     * code or by CAS.
> > > > > > > > +     */
> > > > > > > > +    ppc_set_compat(cpu,
> > > > > > > > +                   cpu == first_ppc_cpu ?
> > > > > > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > > > > > +                   &error_fatal);    
> > > > > > > 
> > > > > > > This assumes that when it is called for a non-boot CPU, it has already
> > > > > > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > > > > > the sequence of reset calls during a full machine reset?
> > > > > > >     
> > > > > > 
> > > > > > This happens to be the case. Basically because the boot CPU core
> > > > > > is created (including registering its reset handler) first and
> > > > > > qemu_devices_reset() calls handlers in the same order they were
> > > > > > registered.    
> > > > > 
> > > > > Right, I assumed it works for now, but it seems rather fragile, since
> > > > > I'm not sure we're relying on guaranteed properties of the interface.    
> > > > 
> > > > The reset handler interface is absolutely undocumented, so I guess we
> > > > have no formal guarantees at the present time. But since the current
> > > > implementation has the property, would it be acceptable to carve it
> > > > in stone with added documentation ? In the event of unlikely changes
> > > > to the reset handler logic, people would _just_ need to make sure
> > > > handlers are called in the same order they were registered.    
> > > 
> > > Yeah, maybe.
> > > 
> > > One other thing occurs to me: will we still do things in the right
> > > order if the (initial) boot cpu is hot unplugged, then replugged
> > > before a reset?
> > >     
> > 
> > This can't happen AFAICT.
> > 
> > (qemu) qom-get /machine/unattached/device[1] core-id
> > 0
> > (qemu) device_del /machine/unattached/device[1]
> > Error: Boot CPU core may not be unplugged
> > 
> > commit 62be8b044adf47327ebefdefb25f28a40316ebd0
> > Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Date:   Wed Jul 27 10:44:42 2016 +0530
> > 
> >     spapr: Prevent boot CPU core removal
> > 
> > 
> > So yes, this adds yet another road block on the way to support hot
> > unplug of the boot CPU. Is this a concern ?
> > 
> > If we go forward with this patch, maybe I should mention in the
> > changelog/documentation the various assumptions which this patch
> > is made under:
> > - reset handlers are called in the same order they were registered
> > - boot CPU registers its reset handler before other CPUs
> > - boot CPU cannot be hot unplugged
> > 
> > These guarantee that the boot core is always reset before other
> > cores during reset.  
> it might work for now but it seems fragile to me.
> 
> What if we  make compat mode a property and move setting it to machine code,
> more precisely treat it like any other cpu feature property.
>   
>  if(need_compat_more)
>     register_global_property(compat_mode)
> 
> that way when any cpu is created it will have this property set
> and it won't depend on the order CPUs are created/reset

Ah it's more complicated, ignore this nonsense pls.

> 
> >   
> > > > > Is there any way we could at least assert() if things are called out
> > > > > of order?
> > > > >     
> > > > 
> > > > Maybe. I'll look into it.
> > > >     
> > > > > >     
> > > > > > > >  }
> > > > > > > >  
> > > > > > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,    
> > > > > > >     
> > > > > >     
> > > > > 
> > > > > 
> > > > >     
> > > >     
> > > 
> > > 
> > >     
> >   
> 
>
David Gibson Nov. 27, 2020, 4:59 a.m. UTC | #9
On Thu, Nov 26, 2020 at 10:10:27AM +0100, Greg Kurz wrote:
> On Thu, 26 Nov 2020 15:57:37 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Nov 25, 2020 at 10:51:05AM +0100, Greg Kurz wrote:
> > > On Wed, 25 Nov 2020 13:39:47 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > On Mon, Nov 23, 2020 at 12:51:08PM +0100, Greg Kurz wrote:
> > > > > On Mon, 23 Nov 2020 16:11:30 +1100
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > 
> > > > > > On Sat, Nov 21, 2020 at 12:42:03AM +0100, Greg Kurz wrote:
> > > > > > > When it comes to resetting the compat mode of the vCPUS, there are
> > > > > > > two situations to consider:
> > > > > > > (1) machine reset should set the compat mode back to the machine default,
> > > > > > >     ie. spapr->max_compat_pvr
> > > > > > > (2) hot plugged vCPUs should set their compat mode to mach the boot vCPU,
> > > > > > >     ie. POWERPC_CPU(first_cpu)->compat_pvr
> > > > > > > 
> > > > > > > This is currently handled in two separate places: globally for all vCPUs
> > > > > > > from the machine reset code for (1) and for each thread of a core from
> > > > > > > the hotplug path for (2).
> > > > > > > 
> > > > > > > Since the machine reset code already resets all vCPUs, starting with boot
> > > > > > > vCPU, consolidate the logic in spapr_reset_vcpu(). Special case the boot
> > > > > > > vCPU so that it resets its compat mode back to the machine default. Any
> > > > > > > other vCPU just need to match the compat mode of the boot vCPU.
> > > > > > > 
> > > > > > > Failing to set the compat mode during machine reset is a fatal error,
> > > > > > > but not for hot plugged vCPUs. This is arguable because if we've been
> > > > > > > able to set the boot vCPU compat mode at CAS or during machine reset,
> > > > > > > it should definitely not fail for other vCPUs. Since spapr_reset_vcpu()
> > > > > > > already has a fatal error path for kvm_check_mmu() failures, do the
> > > > > > > same for ppc_set_compat().
> > > > > > > 
> > > > > > > This gets rid of an error path in spapr_core_plug(). It will allow
> > > > > > > further simplifications.
> > > > > > > 
> > > > > > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > > > > > ---
> > > > > > >  hw/ppc/spapr.c          | 16 ----------------
> > > > > > >  hw/ppc/spapr_cpu_core.c | 13 +++++++++++++
> > > > > > >  2 files changed, 13 insertions(+), 16 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > > index f58f77389e8e..da7586f548df 100644
> > > > > > > --- a/hw/ppc/spapr.c
> > > > > > > +++ b/hw/ppc/spapr.c
> > > > > > > @@ -1606,8 +1606,6 @@ static void spapr_machine_reset(MachineState *machine)
> > > > > > >      spapr_ovec_cleanup(spapr->ov5_cas);
> > > > > > >      spapr->ov5_cas = spapr_ovec_new();
> > > > > > >  
> > > > > > > -    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
> > > > > > > -
> > > > > > >      /*
> > > > > > >       * This is fixing some of the default configuration of the XIVE
> > > > > > >       * devices. To be called after the reset of the machine devices.
> > > > > > > @@ -3785,20 +3783,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> > > > > > >  
> > > > > > >      core_slot->cpu = OBJECT(dev);
> > > > > > >  
> > > > > > > -    /*
> > > > > > > -     * Set compatibility mode to match the boot CPU, which was either set
> > > > > > > -     * by the machine reset code or by CAS.
> > > > > > > -     */
> > > > > > > -    if (hotplugged) {
> > > > > > > -        for (i = 0; i < cc->nr_threads; i++) {
> > > > > > > -            if (ppc_set_compat(core->threads[i],
> > > > > > > -                               POWERPC_CPU(first_cpu)->compat_pvr,
> > > > > > > -                               errp) < 0) {
> > > > > > > -                return;
> > > > > > > -            }
> > > > > > > -        }
> > > > > > > -    }
> > > > > > > -
> > > > > > >      if (smc->pre_2_10_has_unused_icps) {
> > > > > > >          for (i = 0; i < cc->nr_threads; i++) {
> > > > > > >              cs = CPU(core->threads[i]);
> > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > > index 2f7dc3c23ded..17741a3fb77f 100644
> > > > > > > --- a/hw/ppc/spapr_cpu_core.c
> > > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > > @@ -27,6 +27,7 @@
> > > > > > >  
> > > > > > >  static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > >  {
> > > > > > > +    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
> > > > > > >      CPUState *cs = CPU(cpu);
> > > > > > >      CPUPPCState *env = &cpu->env;
> > > > > > >      PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > > > > > > @@ -69,6 +70,18 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
> > > > > > >      kvm_check_mmu(cpu, &error_fatal);
> > > > > > >  
> > > > > > >      spapr_irq_cpu_intc_reset(spapr, cpu);
> > > > > > > +
> > > > > > > +    /*
> > > > > > > +     * The boot CPU is only reset during machine reset : reset its
> > > > > > > +     * compatibility mode to the machine default. For other CPUs,
> > > > > > > +     * either cold plugged or hot plugged, set the compatibility mode
> > > > > > > +     * to match the boot CPU, which was either set by the machine reset
> > > > > > > +     * code or by CAS.
> > > > > > > +     */
> > > > > > > +    ppc_set_compat(cpu,
> > > > > > > +                   cpu == first_ppc_cpu ?
> > > > > > > +                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
> > > > > > > +                   &error_fatal);
> > > > > > 
> > > > > > This assumes that when it is called for a non-boot CPU, it has already
> > > > > > been called for the boot CPU..  Are we certain that's guaranteed by
> > > > > > the sequence of reset calls during a full machine reset?
> > > > > > 
> > > > > 
> > > > > This happens to be the case. Basically because the boot CPU core
> > > > > is created (including registering its reset handler) first and
> > > > > qemu_devices_reset() calls handlers in the same order they were
> > > > > registered.
> > > > 
> > > > Right, I assumed it works for now, but it seems rather fragile, since
> > > > I'm not sure we're relying on guaranteed properties of the interface.
> > > 
> > > The reset handler interface is absolutely undocumented, so I guess we
> > > have no formal guarantees at the present time. But since the current
> > > implementation has the property, would it be acceptable to carve it
> > > in stone with added documentation ? In the event of unlikely changes
> > > to the reset handler logic, people would _just_ need to make sure
> > > handlers are called in the same order they were registered.
> > 
> > Yeah, maybe.
> > 
> > One other thing occurs to me: will we still do things in the right
> > order if the (initial) boot cpu is hot unplugged, then replugged
> > before a reset?
> > 
> 
> This can't happen AFAICT.
> 
> (qemu) qom-get /machine/unattached/device[1] core-id
> 0
> (qemu) device_del /machine/unattached/device[1]
> Error: Boot CPU core may not be unplugged
> 
> commit 62be8b044adf47327ebefdefb25f28a40316ebd0
> Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Date:   Wed Jul 27 10:44:42 2016 +0530
> 
>     spapr: Prevent boot CPU core removal

Oh yeah, I'd forgotten we did this.

> 
> So yes, this adds yet another road block on the way to support hot
> unplug of the boot CPU. Is this a concern ?
> 
> If we go forward with this patch, maybe I should mention in the
> changelog/documentation the various assumptions which this patch
> is made under:
> - reset handlers are called in the same order they were registered
> - boot CPU registers its reset handler before other CPUs
> - boot CPU cannot be hot unplugged
> 
> These guarantee that the boot core is always reset before other
> cores during reset.
> 
> > > > Is there any way we could at least assert() if things are called out
> > > > of order?
> > > > 
> > > 
> > > Maybe. I'll look into it.
> > > 
> > > > > 
> > > > > > >  }
> > > > > > >  
> > > > > > >  void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
> > > > > > 
> > > > > 
> > > > 
> > > > 
> > > > 
> > > 
> > 
> > 
> > 
>
Greg Kurz Nov. 27, 2020, 1:25 p.m. UTC | #10
On Fri, 27 Nov 2020 15:59:16 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

[...]
> > 
> > This can't happen AFAICT.
> > 
> > (qemu) qom-get /machine/unattached/device[1] core-id
> > 0
> > (qemu) device_del /machine/unattached/device[1]
> > Error: Boot CPU core may not be unplugged
> > 
> > commit 62be8b044adf47327ebefdefb25f28a40316ebd0
> > Author: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Date:   Wed Jul 27 10:44:42 2016 +0530
> > 
> >     spapr: Prevent boot CPU core removal
> 
> Oh yeah, I'd forgotten we did this.
> 

Anyway, both you and Igor noted that this change is fragile, which
is true. So maybe we should just go on with the current behavior.

As mentioned in the changelog, one of the motivation to do this was
to get rid of the error path in spapr_core_plug() like other patches
in this series do for the other plug handlers. And I guess I should
do it like these other patches do : come up with a check in
spapr_core_pre_plug() that guarantees that ppc_set_compat() shouldn't
fail in spapr_core_plug() and can be passed &error_abort.

So I'll try to do just that. You can forget this patch.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f58f77389e8e..da7586f548df 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1606,8 +1606,6 @@  static void spapr_machine_reset(MachineState *machine)
     spapr_ovec_cleanup(spapr->ov5_cas);
     spapr->ov5_cas = spapr_ovec_new();
 
-    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
-
     /*
      * This is fixing some of the default configuration of the XIVE
      * devices. To be called after the reset of the machine devices.
@@ -3785,20 +3783,6 @@  static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     core_slot->cpu = OBJECT(dev);
 
-    /*
-     * Set compatibility mode to match the boot CPU, which was either set
-     * by the machine reset code or by CAS.
-     */
-    if (hotplugged) {
-        for (i = 0; i < cc->nr_threads; i++) {
-            if (ppc_set_compat(core->threads[i],
-                               POWERPC_CPU(first_cpu)->compat_pvr,
-                               errp) < 0) {
-                return;
-            }
-        }
-    }
-
     if (smc->pre_2_10_has_unused_icps) {
         for (i = 0; i < cc->nr_threads; i++) {
             cs = CPU(core->threads[i]);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 2f7dc3c23ded..17741a3fb77f 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -27,6 +27,7 @@ 
 
 static void spapr_reset_vcpu(PowerPCCPU *cpu)
 {
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
@@ -69,6 +70,18 @@  static void spapr_reset_vcpu(PowerPCCPU *cpu)
     kvm_check_mmu(cpu, &error_fatal);
 
     spapr_irq_cpu_intc_reset(spapr, cpu);
+
+    /*
+     * The boot CPU is only reset during machine reset : reset its
+     * compatibility mode to the machine default. For other CPUs,
+     * either cold plugged or hot plugged, set the compatibility mode
+     * to match the boot CPU, which was either set by the machine reset
+     * code or by CAS.
+     */
+    ppc_set_compat(cpu,
+                   cpu == first_ppc_cpu ?
+                   spapr->max_compat_pvr : first_ppc_cpu->compat_pvr,
+                   &error_fatal);
 }
 
 void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,