diff mbox

[RFC,v0] cpus: Convert cpu_index into a bitmap

Message ID 1426247796-1657-1-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao March 13, 2015, 11:56 a.m. UTC
From: Bharata B Rao <bharata.rao@gmail.com>

Currently CPUState.cpu_index is monotonically increasing and a newly
created CPU always gets the next higher index. The next available
index is calculated by counting the existing number of CPUs. This is
fine as long as we only add CPUs, but there are architectures which
are starting to support CPU removal too. For an architecture like PowerPC
which derives its CPU identifier (device tree ID) from cpu_index, the
existing logic of generating cpu_index values causes problems.

With the currently proposed method of handling vCPU removal by parking
the vCPU fd in QEMU
(Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
generating cpu_index this way will not work for PowerPC.

This patch changes the way cpu_index is handed out by maintaining
a bit map of the CPUs that tracks both addition and removal of CPUs.

I am not sure if this is the right and an acceptable approach. The
alternative is to do something similar for PowerPC alone and not
depend on cpu_index.

I have tested this with out-of-the-tree patches for CPU hot plug and
removal on x86 and sPAPR PowerPC.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 exec.c                      | 39 +++++++++++++++++++++++++++++----------
 include/exec/exec-all.h     |  1 +
 target-alpha/cpu.c          |  6 ++++++
 target-arm/cpu.c            |  1 +
 target-cris/cpu.c           |  6 ++++++
 target-i386/cpu.c           |  6 ++++++
 target-lm32/cpu.c           |  6 ++++++
 target-m68k/cpu.c           |  6 ++++++
 target-microblaze/cpu.c     |  6 ++++++
 target-mips/cpu.c           |  6 ++++++
 target-moxie/cpu.c          |  6 ++++++
 target-openrisc/cpu.c       |  6 ++++++
 target-ppc/translate_init.c |  6 ++++++
 target-s390x/cpu.c          |  1 +
 target-sh4/cpu.c            |  6 ++++++
 target-sparc/cpu.c          |  1 +
 target-tricore/cpu.c        |  5 +++++
 target-unicore32/cpu.c      |  6 ++++++
 target-xtensa/cpu.c         |  6 ++++++
 19 files changed, 116 insertions(+), 10 deletions(-)

Comments

Alexander Graf March 17, 2015, 6:56 a.m. UTC | #1
On 13.03.15 12:56, Bharata B Rao wrote:
> From: Bharata B Rao <bharata.rao@gmail.com>
> 
> Currently CPUState.cpu_index is monotonically increasing and a newly
> created CPU always gets the next higher index. The next available
> index is calculated by counting the existing number of CPUs. This is
> fine as long as we only add CPUs, but there are architectures which
> are starting to support CPU removal too. For an architecture like PowerPC
> which derives its CPU identifier (device tree ID) from cpu_index, the
> existing logic of generating cpu_index values causes problems.
> 
> With the currently proposed method of handling vCPU removal by parking
> the vCPU fd in QEMU
> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> generating cpu_index this way will not work for PowerPC.
> 
> This patch changes the way cpu_index is handed out by maintaining
> a bit map of the CPUs that tracks both addition and removal of CPUs.
> 
> I am not sure if this is the right and an acceptable approach. The
> alternative is to do something similar for PowerPC alone and not
> depend on cpu_index.
> 
> I have tested this with out-of-the-tree patches for CPU hot plug and
> removal on x86 and sPAPR PowerPC.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
>  include/exec/exec-all.h     |  1 +
>  target-alpha/cpu.c          |  6 ++++++
>  target-arm/cpu.c            |  1 +
>  target-cris/cpu.c           |  6 ++++++
>  target-i386/cpu.c           |  6 ++++++
>  target-lm32/cpu.c           |  6 ++++++
>  target-m68k/cpu.c           |  6 ++++++
>  target-microblaze/cpu.c     |  6 ++++++
>  target-mips/cpu.c           |  6 ++++++
>  target-moxie/cpu.c          |  6 ++++++
>  target-openrisc/cpu.c       |  6 ++++++
>  target-ppc/translate_init.c |  6 ++++++
>  target-s390x/cpu.c          |  1 +
>  target-sh4/cpu.c            |  6 ++++++
>  target-sparc/cpu.c          |  1 +
>  target-tricore/cpu.c        |  5 +++++
>  target-unicore32/cpu.c      |  6 ++++++
>  target-xtensa/cpu.c         |  6 ++++++
>  19 files changed, 116 insertions(+), 10 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index e97071a..7760f2d 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
>  }
>  #endif
>  
> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> +
> +#ifdef CONFIG_USER_ONLY
> +int max_cpus = 1; /* TODO: Check if this is correct ? */
> +#endif
> +
> +static int cpu_get_free_index(void)
> +{
> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> +
> +    if (cpu == max_cpus) {
> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> +                        "CPUs than allowed max of %d\n", max_cpus);
> +        return max_cpus;
> +    } else {
> +        bitmap_set(cpu_index_map, cpu, 1);
> +        return cpu;
> +    }
> +}
> +
> +void cpu_exec_exit(CPUState *cpu)
> +{
> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> +}
> +
>  void cpu_exec_init(CPUArchState *env)
>  {
>      CPUState *cpu = ENV_GET_CPU(env);
>      CPUClass *cc = CPU_GET_CLASS(cpu);
> -    CPUState *some_cpu;
> -    int cpu_index;
>  
>  #if defined(CONFIG_USER_ONLY)
>      cpu_list_lock();
>  #endif
> -    cpu_index = 0;
> -    CPU_FOREACH(some_cpu) {
> -        cpu_index++;
> -    }
> -    cpu->cpu_index = cpu_index;
> +    cpu->cpu_index = cpu_get_free_index();
>      cpu->numa_node = 0;
>      QTAILQ_INIT(&cpu->breakpoints);
>      QTAILQ_INIT(&cpu->watchpoints);
> @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
>      cpu_list_unlock();
>  #endif
>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
> +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
>      }
>  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
> +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
>                      cpu_save, cpu_load, env);
>      assert(cc->vmsd == NULL);
>      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
>  #endif
>      if (cc->vmsd != NULL) {
> -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
> +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
>      }
>  }
>  
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 8eb0db3..95fbba0 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>                                target_ulong pc, target_ulong cs_base, int flags,
>                                int cflags);
>  void cpu_exec_init(CPUArchState *env);
> +void cpu_exec_exit(CPUState *cpu);
>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
>  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
>  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
> diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
> index a98b7d8..7c57165 100644
> --- a/target-alpha/cpu.c
> +++ b/target-alpha/cpu.c
> @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
>      .parent = TYPE("ev67"),
>  };
>  
> +static void alpha_cpu_finalize(Object *obj)
> +{
> +    cpu_exec_exit(CPU(obj));
> +}
> +
>  static void alpha_cpu_initfn(Object *obj)
>  {
>      CPUState *cs = CPU(obj);
> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
>      .parent = TYPE_CPU,
>      .instance_size = sizeof(AlphaCPU),
>      .instance_init = alpha_cpu_initfn,
> +    .instance_finalize = alpha_cpu_finalize,

Would it be possible to put this into TYPE_CPU->instance_finalize instead?


Alex
Bharata B Rao March 17, 2015, 8:39 a.m. UTC | #2
On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
> 
> 
> On 13.03.15 12:56, Bharata B Rao wrote:
> > From: Bharata B Rao <bharata.rao@gmail.com>
> > 
> > Currently CPUState.cpu_index is monotonically increasing and a newly
> > created CPU always gets the next higher index. The next available
> > index is calculated by counting the existing number of CPUs. This is
> > fine as long as we only add CPUs, but there are architectures which
> > are starting to support CPU removal too. For an architecture like PowerPC
> > which derives its CPU identifier (device tree ID) from cpu_index, the
> > existing logic of generating cpu_index values causes problems.
> > 
> > With the currently proposed method of handling vCPU removal by parking
> > the vCPU fd in QEMU
> > (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > generating cpu_index this way will not work for PowerPC.
> > 
> > This patch changes the way cpu_index is handed out by maintaining
> > a bit map of the CPUs that tracks both addition and removal of CPUs.
> > 
> > I am not sure if this is the right and an acceptable approach. The
> > alternative is to do something similar for PowerPC alone and not
> > depend on cpu_index.
> > 
> > I have tested this with out-of-the-tree patches for CPU hot plug and
> > removal on x86 and sPAPR PowerPC.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > ---
> >  exec.c                      | 39 +++++++++++++++++++++++++++++----------
> >  include/exec/exec-all.h     |  1 +
> >  target-alpha/cpu.c          |  6 ++++++
> >  target-arm/cpu.c            |  1 +
> >  target-cris/cpu.c           |  6 ++++++
> >  target-i386/cpu.c           |  6 ++++++
> >  target-lm32/cpu.c           |  6 ++++++
> >  target-m68k/cpu.c           |  6 ++++++
> >  target-microblaze/cpu.c     |  6 ++++++
> >  target-mips/cpu.c           |  6 ++++++
> >  target-moxie/cpu.c          |  6 ++++++
> >  target-openrisc/cpu.c       |  6 ++++++
> >  target-ppc/translate_init.c |  6 ++++++
> >  target-s390x/cpu.c          |  1 +
> >  target-sh4/cpu.c            |  6 ++++++
> >  target-sparc/cpu.c          |  1 +
> >  target-tricore/cpu.c        |  5 +++++
> >  target-unicore32/cpu.c      |  6 ++++++
> >  target-xtensa/cpu.c         |  6 ++++++
> >  19 files changed, 116 insertions(+), 10 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index e97071a..7760f2d 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
> >  }
> >  #endif
> >  
> > +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> > +
> > +#ifdef CONFIG_USER_ONLY
> > +int max_cpus = 1; /* TODO: Check if this is correct ? */
> > +#endif
> > +
> > +static int cpu_get_free_index(void)
> > +{
> > +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> > +
> > +    if (cpu == max_cpus) {
> > +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> > +                        "CPUs than allowed max of %d\n", max_cpus);
> > +        return max_cpus;
> > +    } else {
> > +        bitmap_set(cpu_index_map, cpu, 1);
> > +        return cpu;
> > +    }
> > +}
> > +
> > +void cpu_exec_exit(CPUState *cpu)
> > +{
> > +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> > +}
> > +
> >  void cpu_exec_init(CPUArchState *env)
> >  {
> >      CPUState *cpu = ENV_GET_CPU(env);
> >      CPUClass *cc = CPU_GET_CLASS(cpu);
> > -    CPUState *some_cpu;
> > -    int cpu_index;
> >  
> >  #if defined(CONFIG_USER_ONLY)
> >      cpu_list_lock();
> >  #endif
> > -    cpu_index = 0;
> > -    CPU_FOREACH(some_cpu) {
> > -        cpu_index++;
> > -    }
> > -    cpu->cpu_index = cpu_index;
> > +    cpu->cpu_index = cpu_get_free_index();
> >      cpu->numa_node = 0;
> >      QTAILQ_INIT(&cpu->breakpoints);
> >      QTAILQ_INIT(&cpu->watchpoints);
> > @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
> >      cpu_list_unlock();
> >  #endif
> >      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> > -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
> > +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
> >      }
> >  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> > -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
> > +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
> >                      cpu_save, cpu_load, env);
> >      assert(cc->vmsd == NULL);
> >      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
> >  #endif
> >      if (cc->vmsd != NULL) {
> > -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
> > +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
> >      }
> >  }
> >  
> > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> > index 8eb0db3..95fbba0 100644
> > --- a/include/exec/exec-all.h
> > +++ b/include/exec/exec-all.h
> > @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> >                                target_ulong pc, target_ulong cs_base, int flags,
> >                                int cflags);
> >  void cpu_exec_init(CPUArchState *env);
> > +void cpu_exec_exit(CPUState *cpu);
> >  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
> >  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
> >  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
> > diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
> > index a98b7d8..7c57165 100644
> > --- a/target-alpha/cpu.c
> > +++ b/target-alpha/cpu.c
> > @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
> >      .parent = TYPE("ev67"),
> >  };
> >  
> > +static void alpha_cpu_finalize(Object *obj)
> > +{
> > +    cpu_exec_exit(CPU(obj));
> > +}
> > +
> >  static void alpha_cpu_initfn(Object *obj)
> >  {
> >      CPUState *cs = CPU(obj);
> > @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
> >      .parent = TYPE_CPU,
> >      .instance_size = sizeof(AlphaCPU),
> >      .instance_init = alpha_cpu_initfn,
> > +    .instance_finalize = alpha_cpu_finalize,
> 
> Would it be possible to put this into TYPE_CPU->instance_finalize instead?

Yes possible and that would be much cleaner since I wouldn't have to touch
all archs. But it will be asymmetric in some sense as cpu_exec_init() is
called from all individual cpus' instance_init but cpu_exec_exit() will be
called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
post v2 with this change.

Regards,
Bharata.
Andreas Färber March 17, 2015, 10:51 a.m. UTC | #3
Am 13.03.2015 um 12:56 schrieb Bharata B Rao:
> From: Bharata B Rao <bharata.rao@gmail.com>
> 
> Currently CPUState.cpu_index is monotonically increasing and a newly
> created CPU always gets the next higher index. The next available
> index is calculated by counting the existing number of CPUs. This is
> fine as long as we only add CPUs, but there are architectures which
> are starting to support CPU removal too. For an architecture like PowerPC
> which derives its CPU identifier (device tree ID) from cpu_index, the
> existing logic of generating cpu_index values causes problems.
> 
> With the currently proposed method of handling vCPU removal by parking
> the vCPU fd in QEMU
> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> generating cpu_index this way will not work for PowerPC.
> 
> This patch changes the way cpu_index is handed out by maintaining
> a bit map of the CPUs that tracks both addition and removal of CPUs.
> 
> I am not sure if this is the right and an acceptable approach. The
> alternative is to do something similar for PowerPC alone and not
> depend on cpu_index.
> 
> I have tested this with out-of-the-tree patches for CPU hot plug and
> removal on x86 and sPAPR PowerPC.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
>  include/exec/exec-all.h     |  1 +
>  target-alpha/cpu.c          |  6 ++++++
>  target-arm/cpu.c            |  1 +
>  target-cris/cpu.c           |  6 ++++++
>  target-i386/cpu.c           |  6 ++++++
>  target-lm32/cpu.c           |  6 ++++++
>  target-m68k/cpu.c           |  6 ++++++
>  target-microblaze/cpu.c     |  6 ++++++
>  target-mips/cpu.c           |  6 ++++++
>  target-moxie/cpu.c          |  6 ++++++
>  target-openrisc/cpu.c       |  6 ++++++
>  target-ppc/translate_init.c |  6 ++++++
>  target-s390x/cpu.c          |  1 +
>  target-sh4/cpu.c            |  6 ++++++
>  target-sparc/cpu.c          |  1 +
>  target-tricore/cpu.c        |  5 +++++
>  target-unicore32/cpu.c      |  6 ++++++
>  target-xtensa/cpu.c         |  6 ++++++
>  19 files changed, 116 insertions(+), 10 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index e97071a..7760f2d 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
>  }
>  #endif
>  
> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

I don't see this constant being defined in this patch. How large is it?
I wonder whether this might be stolen from an x86 ACPI/NUMA context and
forced onto all architectures now?

> +
> +#ifdef CONFIG_USER_ONLY
> +int max_cpus = 1; /* TODO: Check if this is correct ? */

This strikes me as wrong, as forking will create a copy of the initial
CPUState, see cpu_copy(). The cpu_index might get overwritten inside the
CPUState, not sure about that, but this here is global state.

Can't we just keep the current code for CONFIG_USER_ONLY and skip all
the bitmap functions?

> +#endif
> +
> +static int cpu_get_free_index(void)
> +{
> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> +
> +    if (cpu == max_cpus) {
> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> +                        "CPUs than allowed max of %d\n", max_cpus);

This is a bad API. :) If we can't handle it, use an Error** errp
argument to pass that info outwards. Imagine this happening from QMP
device-add, then this warning will go unnoticed on stderr.

(Also, error_report() would use the real executable name.)

> +        return max_cpus;
> +    } else {
> +        bitmap_set(cpu_index_map, cpu, 1);
> +        return cpu;
> +    }
> +}
> +
> +void cpu_exec_exit(CPUState *cpu)
> +{
> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> +}
> +
>  void cpu_exec_init(CPUArchState *env)
[snip]

Regards,
Andreas
Andreas Färber March 17, 2015, 10:56 a.m. UTC | #4
Am 17.03.2015 um 09:39 schrieb Bharata B Rao:
> On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
>>
>>
>> On 13.03.15 12:56, Bharata B Rao wrote:
>>> From: Bharata B Rao <bharata.rao@gmail.com>
>>>
>>> Currently CPUState.cpu_index is monotonically increasing and a newly
>>> created CPU always gets the next higher index. The next available
>>> index is calculated by counting the existing number of CPUs. This is
>>> fine as long as we only add CPUs, but there are architectures which
>>> are starting to support CPU removal too. For an architecture like PowerPC
>>> which derives its CPU identifier (device tree ID) from cpu_index, the
>>> existing logic of generating cpu_index values causes problems.
>>>
>>> With the currently proposed method of handling vCPU removal by parking
>>> the vCPU fd in QEMU
>>> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
>>> generating cpu_index this way will not work for PowerPC.
>>>
>>> This patch changes the way cpu_index is handed out by maintaining
>>> a bit map of the CPUs that tracks both addition and removal of CPUs.
>>>
>>> I am not sure if this is the right and an acceptable approach. The
>>> alternative is to do something similar for PowerPC alone and not
>>> depend on cpu_index.
>>>
>>> I have tested this with out-of-the-tree patches for CPU hot plug and
>>> removal on x86 and sPAPR PowerPC.
>>>
>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>>> ---
>>>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
>>>  include/exec/exec-all.h     |  1 +
>>>  target-alpha/cpu.c          |  6 ++++++
>>>  target-arm/cpu.c            |  1 +
>>>  target-cris/cpu.c           |  6 ++++++
>>>  target-i386/cpu.c           |  6 ++++++
>>>  target-lm32/cpu.c           |  6 ++++++
>>>  target-m68k/cpu.c           |  6 ++++++
>>>  target-microblaze/cpu.c     |  6 ++++++
>>>  target-mips/cpu.c           |  6 ++++++
>>>  target-moxie/cpu.c          |  6 ++++++
>>>  target-openrisc/cpu.c       |  6 ++++++
>>>  target-ppc/translate_init.c |  6 ++++++
>>>  target-s390x/cpu.c          |  1 +
>>>  target-sh4/cpu.c            |  6 ++++++
>>>  target-sparc/cpu.c          |  1 +
>>>  target-tricore/cpu.c        |  5 +++++
>>>  target-unicore32/cpu.c      |  6 ++++++
>>>  target-xtensa/cpu.c         |  6 ++++++
>>>  19 files changed, 116 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/exec.c b/exec.c
>>> index e97071a..7760f2d 100644
>>> --- a/exec.c
>>> +++ b/exec.c
>>> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
>>>  }
>>>  #endif
>>>  
>>> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
>>> +
>>> +#ifdef CONFIG_USER_ONLY
>>> +int max_cpus = 1; /* TODO: Check if this is correct ? */
>>> +#endif
>>> +
>>> +static int cpu_get_free_index(void)
>>> +{
>>> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
>>> +
>>> +    if (cpu == max_cpus) {
>>> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
>>> +                        "CPUs than allowed max of %d\n", max_cpus);
>>> +        return max_cpus;
>>> +    } else {
>>> +        bitmap_set(cpu_index_map, cpu, 1);
>>> +        return cpu;
>>> +    }
>>> +}
>>> +
>>> +void cpu_exec_exit(CPUState *cpu)
>>> +{
>>> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
>>> +}
>>> +
>>>  void cpu_exec_init(CPUArchState *env)
>>>  {
>>>      CPUState *cpu = ENV_GET_CPU(env);
>>>      CPUClass *cc = CPU_GET_CLASS(cpu);
>>> -    CPUState *some_cpu;
>>> -    int cpu_index;
>>>  
>>>  #if defined(CONFIG_USER_ONLY)
>>>      cpu_list_lock();
>>>  #endif
>>> -    cpu_index = 0;
>>> -    CPU_FOREACH(some_cpu) {
>>> -        cpu_index++;
>>> -    }
>>> -    cpu->cpu_index = cpu_index;
>>> +    cpu->cpu_index = cpu_get_free_index();
>>>      cpu->numa_node = 0;
>>>      QTAILQ_INIT(&cpu->breakpoints);
>>>      QTAILQ_INIT(&cpu->watchpoints);
>>> @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
>>>      cpu_list_unlock();
>>>  #endif
>>>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
>>> -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
>>> +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
>>>      }
>>>  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
>>> -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
>>> +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
>>>                      cpu_save, cpu_load, env);
>>>      assert(cc->vmsd == NULL);
>>>      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
>>>  #endif
>>>      if (cc->vmsd != NULL) {
>>> -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
>>> +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
>>>      }
>>>  }
>>>  
>>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>>> index 8eb0db3..95fbba0 100644
>>> --- a/include/exec/exec-all.h
>>> +++ b/include/exec/exec-all.h
>>> @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>>>                                target_ulong pc, target_ulong cs_base, int flags,
>>>                                int cflags);
>>>  void cpu_exec_init(CPUArchState *env);
>>> +void cpu_exec_exit(CPUState *cpu);
>>>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
>>>  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
>>>  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>>> diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
>>> index a98b7d8..7c57165 100644
>>> --- a/target-alpha/cpu.c
>>> +++ b/target-alpha/cpu.c
>>> @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
>>>      .parent = TYPE("ev67"),
>>>  };
>>>  
>>> +static void alpha_cpu_finalize(Object *obj)
>>> +{
>>> +    cpu_exec_exit(CPU(obj));
>>> +}
>>> +
>>>  static void alpha_cpu_initfn(Object *obj)
>>>  {
>>>      CPUState *cs = CPU(obj);
>>> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
>>>      .parent = TYPE_CPU,
>>>      .instance_size = sizeof(AlphaCPU),
>>>      .instance_init = alpha_cpu_initfn,
>>> +    .instance_finalize = alpha_cpu_finalize,
>>
>> Would it be possible to put this into TYPE_CPU->instance_finalize instead?
> 
> Yes possible and that would be much cleaner since I wouldn't have to touch
> all archs. But it will be asymmetric in some sense as cpu_exec_init() is
> called from all individual cpus' instance_init but cpu_exec_exit() will be
> called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
> post v2 with this change.

Could you check: Wasn't there a patch from Fujitsu to move
cpu_exec_init() to generic code? If both were generic, that would be
fine. If that is problematic, I would accept the mismatch as long as it
is "safe". That is, instance_finalize needs to handle any state of the
object, and I think these two are better suited for realize/unrealize
than instance_init/instance_finalize.

The unanswered question around the Fujitsu patches is mostly whether
some of such code movements impact timing in the sense of breaking
migration compatibility by changing the relative order.

Probably something my CPU test case should be extended with.

Regards,
Andreas
David Gibson March 18, 2015, 12:49 a.m. UTC | #5
On Fri, Mar 13, 2015 at 05:26:36PM +0530, Bharata B Rao wrote:
> From: Bharata B Rao <bharata.rao@gmail.com>
> 
> Currently CPUState.cpu_index is monotonically increasing and a newly
> created CPU always gets the next higher index. The next available
> index is calculated by counting the existing number of CPUs. This is
> fine as long as we only add CPUs, but there are architectures which
> are starting to support CPU removal too. For an architecture like PowerPC
> which derives its CPU identifier (device tree ID) from cpu_index, the
> existing logic of generating cpu_index values causes problems.
> 
> With the currently proposed method of handling vCPU removal by parking
> the vCPU fd in QEMU
> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> generating cpu_index this way will not work for PowerPC.
> 
> This patch changes the way cpu_index is handed out by maintaining
> a bit map of the CPUs that tracks both addition and removal of CPUs.
> 
> I am not sure if this is the right and an acceptable approach. The
> alternative is to do something similar for PowerPC alone and not
> depend on cpu_index.
> 
> I have tested this with out-of-the-tree patches for CPU hot plug and
> removal on x86 and sPAPR PowerPC.

How does this interact with the tweaking of cpu indexes that spapr
does in order to configure the guest SMT mode on POWER7 and POWER8
systems?
Bharata B Rao March 18, 2015, 6:20 a.m. UTC | #6
On Tue, Mar 17, 2015 at 11:56:04AM +0100, Andreas Färber wrote:
> Am 17.03.2015 um 09:39 schrieb Bharata B Rao:
> > On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
> >>
> >>
> >> On 13.03.15 12:56, Bharata B Rao wrote:
> >>> From: Bharata B Rao <bharata.rao@gmail.com>
> >>>
> >>> Currently CPUState.cpu_index is monotonically increasing and a newly
> >>> created CPU always gets the next higher index. The next available
> >>> index is calculated by counting the existing number of CPUs. This is
> >>> fine as long as we only add CPUs, but there are architectures which
> >>> are starting to support CPU removal too. For an architecture like PowerPC
> >>> which derives its CPU identifier (device tree ID) from cpu_index, the
> >>> existing logic of generating cpu_index values causes problems.
> >>>
> >>> With the currently proposed method of handling vCPU removal by parking
> >>> the vCPU fd in QEMU
> >>> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> >>> generating cpu_index this way will not work for PowerPC.
> >>>
> >>> This patch changes the way cpu_index is handed out by maintaining
> >>> a bit map of the CPUs that tracks both addition and removal of CPUs.
> >>>
> >>> I am not sure if this is the right and an acceptable approach. The
> >>> alternative is to do something similar for PowerPC alone and not
> >>> depend on cpu_index.
> >>>
> >>> I have tested this with out-of-the-tree patches for CPU hot plug and
> >>> removal on x86 and sPAPR PowerPC.
> >>>
> >>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> >>> ---
> >>>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
> >>>  include/exec/exec-all.h     |  1 +
> >>>  target-alpha/cpu.c          |  6 ++++++
> >>>  target-arm/cpu.c            |  1 +
> >>>  target-cris/cpu.c           |  6 ++++++
> >>>  target-i386/cpu.c           |  6 ++++++
> >>>  target-lm32/cpu.c           |  6 ++++++
> >>>  target-m68k/cpu.c           |  6 ++++++
> >>>  target-microblaze/cpu.c     |  6 ++++++
> >>>  target-mips/cpu.c           |  6 ++++++
> >>>  target-moxie/cpu.c          |  6 ++++++
> >>>  target-openrisc/cpu.c       |  6 ++++++
> >>>  target-ppc/translate_init.c |  6 ++++++
> >>>  target-s390x/cpu.c          |  1 +
> >>>  target-sh4/cpu.c            |  6 ++++++
> >>>  target-sparc/cpu.c          |  1 +
> >>>  target-tricore/cpu.c        |  5 +++++
> >>>  target-unicore32/cpu.c      |  6 ++++++
> >>>  target-xtensa/cpu.c         |  6 ++++++
> >>>  19 files changed, 116 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/exec.c b/exec.c
> >>> index e97071a..7760f2d 100644
> >>> --- a/exec.c
> >>> +++ b/exec.c
> >>> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
> >>>  }
> >>>  #endif
> >>>  
> >>> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> >>> +
> >>> +#ifdef CONFIG_USER_ONLY
> >>> +int max_cpus = 1; /* TODO: Check if this is correct ? */
> >>> +#endif
> >>> +
> >>> +static int cpu_get_free_index(void)
> >>> +{
> >>> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> >>> +
> >>> +    if (cpu == max_cpus) {
> >>> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> >>> +                        "CPUs than allowed max of %d\n", max_cpus);
> >>> +        return max_cpus;
> >>> +    } else {
> >>> +        bitmap_set(cpu_index_map, cpu, 1);
> >>> +        return cpu;
> >>> +    }
> >>> +}
> >>> +
> >>> +void cpu_exec_exit(CPUState *cpu)
> >>> +{
> >>> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> >>> +}
> >>> +
> >>>  void cpu_exec_init(CPUArchState *env)
> >>>  {
> >>>      CPUState *cpu = ENV_GET_CPU(env);
> >>>      CPUClass *cc = CPU_GET_CLASS(cpu);
> >>> -    CPUState *some_cpu;
> >>> -    int cpu_index;
> >>>  
> >>>  #if defined(CONFIG_USER_ONLY)
> >>>      cpu_list_lock();
> >>>  #endif
> >>> -    cpu_index = 0;
> >>> -    CPU_FOREACH(some_cpu) {
> >>> -        cpu_index++;
> >>> -    }
> >>> -    cpu->cpu_index = cpu_index;
> >>> +    cpu->cpu_index = cpu_get_free_index();
> >>>      cpu->numa_node = 0;
> >>>      QTAILQ_INIT(&cpu->breakpoints);
> >>>      QTAILQ_INIT(&cpu->watchpoints);
> >>> @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
> >>>      cpu_list_unlock();
> >>>  #endif
> >>>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> >>> -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
> >>> +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
> >>>      }
> >>>  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> >>> -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
> >>> +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
> >>>                      cpu_save, cpu_load, env);
> >>>      assert(cc->vmsd == NULL);
> >>>      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
> >>>  #endif
> >>>      if (cc->vmsd != NULL) {
> >>> -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
> >>> +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
> >>>      }
> >>>  }
> >>>  
> >>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> >>> index 8eb0db3..95fbba0 100644
> >>> --- a/include/exec/exec-all.h
> >>> +++ b/include/exec/exec-all.h
> >>> @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> >>>                                target_ulong pc, target_ulong cs_base, int flags,
> >>>                                int cflags);
> >>>  void cpu_exec_init(CPUArchState *env);
> >>> +void cpu_exec_exit(CPUState *cpu);
> >>>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
> >>>  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
> >>>  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
> >>> diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
> >>> index a98b7d8..7c57165 100644
> >>> --- a/target-alpha/cpu.c
> >>> +++ b/target-alpha/cpu.c
> >>> @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
> >>>      .parent = TYPE("ev67"),
> >>>  };
> >>>  
> >>> +static void alpha_cpu_finalize(Object *obj)
> >>> +{
> >>> +    cpu_exec_exit(CPU(obj));
> >>> +}
> >>> +
> >>>  static void alpha_cpu_initfn(Object *obj)
> >>>  {
> >>>      CPUState *cs = CPU(obj);
> >>> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
> >>>      .parent = TYPE_CPU,
> >>>      .instance_size = sizeof(AlphaCPU),
> >>>      .instance_init = alpha_cpu_initfn,
> >>> +    .instance_finalize = alpha_cpu_finalize,
> >>
> >> Would it be possible to put this into TYPE_CPU->instance_finalize instead?
> > 
> > Yes possible and that would be much cleaner since I wouldn't have to touch
> > all archs. But it will be asymmetric in some sense as cpu_exec_init() is
> > called from all individual cpus' instance_init but cpu_exec_exit() will be
> > called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
> > post v2 with this change.
> 
> Could you check: Wasn't there a patch from Fujitsu to move
> cpu_exec_init() to generic code? If both were generic, that would be
> fine. If that is problematic, I would accept the mismatch as long as it
> is "safe". That is, instance_finalize needs to handle any state of the
> object,

There is a patch from Zhu to move only vmstate_register related bits
from cpu_exec_init to cpu_common_realizefn.

http://lists.gnu.org/archive/html/qemu-devel/2015-01/msg01550.html

> and I think these two are better suited for realize/unrealize
> than instance_init/instance_finalize.

And Eduardo has a patch to move cpu_exec_init call from instance_init
to realize for target-i386.

https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg01056.html

So if the preferred way is to call cpu_exec_init from realize, then
Eduardo - Can you do this for all archs ? My limited testing shows
that it (moving cpu_exec_init from instance_init to realize) works for
sPAPR PowerPC too, but not sure about other targets.

After there is consensus on the above two patches, I can do the
cpu_index to bitmap changes.

Regards,
Bharata.
Bharata B Rao March 18, 2015, 6:28 a.m. UTC | #7
On Tue, Mar 17, 2015 at 11:51:36AM +0100, Andreas Färber wrote:
> Am 13.03.2015 um 12:56 schrieb Bharata B Rao:
> > From: Bharata B Rao <bharata.rao@gmail.com>
> > 
> > Currently CPUState.cpu_index is monotonically increasing and a newly
> > created CPU always gets the next higher index. The next available
> > index is calculated by counting the existing number of CPUs. This is
> > fine as long as we only add CPUs, but there are architectures which
> > are starting to support CPU removal too. For an architecture like PowerPC
> > which derives its CPU identifier (device tree ID) from cpu_index, the
> > existing logic of generating cpu_index values causes problems.
> > 
> > With the currently proposed method of handling vCPU removal by parking
> > the vCPU fd in QEMU
> > (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > generating cpu_index this way will not work for PowerPC.
> > 
> > This patch changes the way cpu_index is handed out by maintaining
> > a bit map of the CPUs that tracks both addition and removal of CPUs.
> > 
> > I am not sure if this is the right and an acceptable approach. The
> > alternative is to do something similar for PowerPC alone and not
> > depend on cpu_index.
> > 
> > I have tested this with out-of-the-tree patches for CPU hot plug and
> > removal on x86 and sPAPR PowerPC.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > ---
> >  exec.c                      | 39 +++++++++++++++++++++++++++++----------
> >  include/exec/exec-all.h     |  1 +
> >  target-alpha/cpu.c          |  6 ++++++
> >  target-arm/cpu.c            |  1 +
> >  target-cris/cpu.c           |  6 ++++++
> >  target-i386/cpu.c           |  6 ++++++
> >  target-lm32/cpu.c           |  6 ++++++
> >  target-m68k/cpu.c           |  6 ++++++
> >  target-microblaze/cpu.c     |  6 ++++++
> >  target-mips/cpu.c           |  6 ++++++
> >  target-moxie/cpu.c          |  6 ++++++
> >  target-openrisc/cpu.c       |  6 ++++++
> >  target-ppc/translate_init.c |  6 ++++++
> >  target-s390x/cpu.c          |  1 +
> >  target-sh4/cpu.c            |  6 ++++++
> >  target-sparc/cpu.c          |  1 +
> >  target-tricore/cpu.c        |  5 +++++
> >  target-unicore32/cpu.c      |  6 ++++++
> >  target-xtensa/cpu.c         |  6 ++++++
> >  19 files changed, 116 insertions(+), 10 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index e97071a..7760f2d 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
> >  }
> >  #endif
> >  
> > +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> 
> I don't see this constant being defined in this patch. How large is it?
> I wonder whether this might be stolen from an x86 ACPI/NUMA context and
> forced onto all architectures now?

I thought MAX_CPUMASK_BITS defines the max cpus possible.

From include/sysemu/sysemu.h

/* The following shall be true for all CPUs:
 *   cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS
 *
 * Note that cpu->get_arch_id() may be larger than MAX_CPUMASK_BITS.
 */
#define MAX_CPUMASK_BITS 255

> 
> > +
> > +#ifdef CONFIG_USER_ONLY
> > +int max_cpus = 1; /* TODO: Check if this is correct ? */
> 
> This strikes me as wrong, as forking will create a copy of the initial
> CPUState, see cpu_copy(). The cpu_index might get overwritten inside the
> CPUState, not sure about that, but this here is global state.
> 
> Can't we just keep the current code for CONFIG_USER_ONLY and skip all
> the bitmap functions?

Yes that would better than to touch CONFIG_USER_ONLY.

> 
> > +#endif
> > +
> > +static int cpu_get_free_index(void)
> > +{
> > +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> > +
> > +    if (cpu == max_cpus) {
> > +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> > +                        "CPUs than allowed max of %d\n", max_cpus);
> 
> This is a bad API. :) If we can't handle it, use an Error** errp
> argument to pass that info outwards. Imagine this happening from QMP
> device-add, then this warning will go unnoticed on stderr.

I wanted to have an Error argument, but realized that instance_init
doesn't take an Error argument and as a consequence, it appears that
object_new() can't fail. So I depended on (cpu->cpu_index >= max_cpus)
or equivalent check in realize call to catch this error.

But as being discussed in other thread on the same subject, if
cpu_exec_init call moves to realize, this will get solved neatly.

Regards,
Bharata.
Bharata B Rao March 18, 2015, 6:34 a.m. UTC | #8
On Wed, Mar 18, 2015 at 11:49:59AM +1100, David Gibson wrote:
> On Fri, Mar 13, 2015 at 05:26:36PM +0530, Bharata B Rao wrote:
> > From: Bharata B Rao <bharata.rao@gmail.com>
> > 
> > Currently CPUState.cpu_index is monotonically increasing and a newly
> > created CPU always gets the next higher index. The next available
> > index is calculated by counting the existing number of CPUs. This is
> > fine as long as we only add CPUs, but there are architectures which
> > are starting to support CPU removal too. For an architecture like PowerPC
> > which derives its CPU identifier (device tree ID) from cpu_index, the
> > existing logic of generating cpu_index values causes problems.
> > 
> > With the currently proposed method of handling vCPU removal by parking
> > the vCPU fd in QEMU
> > (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > generating cpu_index this way will not work for PowerPC.
> > 
> > This patch changes the way cpu_index is handed out by maintaining
> > a bit map of the CPUs that tracks both addition and removal of CPUs.
> > 
> > I am not sure if this is the right and an acceptable approach. The
> > alternative is to do something similar for PowerPC alone and not
> > depend on cpu_index.
> > 
> > I have tested this with out-of-the-tree patches for CPU hot plug and
> > removal on x86 and sPAPR PowerPC.
> 
> How does this interact with the tweaking of cpu indexes that spapr
> does in order to configure the guest SMT mode on POWER7 and POWER8
> systems?

I am not changing the mapping of cpu_index to cpu_dt_id. So nothing
should be change. Can you please point me to the piece of tweaking code
that you are referring to above ?

Regards,
Bharata.
David Gibson March 19, 2015, 4:43 a.m. UTC | #9
On Wed, Mar 18, 2015 at 12:04:04PM +0530, Bharata B Rao wrote:
> On Wed, Mar 18, 2015 at 11:49:59AM +1100, David Gibson wrote:
> > On Fri, Mar 13, 2015 at 05:26:36PM +0530, Bharata B Rao wrote:
> > > From: Bharata B Rao <bharata.rao@gmail.com>
> > > 
> > > Currently CPUState.cpu_index is monotonically increasing and a newly
> > > created CPU always gets the next higher index. The next available
> > > index is calculated by counting the existing number of CPUs. This is
> > > fine as long as we only add CPUs, but there are architectures which
> > > are starting to support CPU removal too. For an architecture like PowerPC
> > > which derives its CPU identifier (device tree ID) from cpu_index, the
> > > existing logic of generating cpu_index values causes problems.
> > > 
> > > With the currently proposed method of handling vCPU removal by parking
> > > the vCPU fd in QEMU
> > > (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > > generating cpu_index this way will not work for PowerPC.
> > > 
> > > This patch changes the way cpu_index is handed out by maintaining
> > > a bit map of the CPUs that tracks both addition and removal of CPUs.
> > > 
> > > I am not sure if this is the right and an acceptable approach. The
> > > alternative is to do something similar for PowerPC alone and not
> > > depend on cpu_index.
> > > 
> > > I have tested this with out-of-the-tree patches for CPU hot plug and
> > > removal on x86 and sPAPR PowerPC.
> > 
> > How does this interact with the tweaking of cpu indexes that spapr
> > does in order to configure the guest SMT mode on POWER7 and POWER8
> > systems?
> 
> I am not changing the mapping of cpu_index to cpu_dt_id. So nothing
> should be change. Can you please point me to the piece of tweaking code
> that you are referring to above ?

Ah, I thought it actually adjusted the cpu_index values, rather than
mapping them to different cpu_dt_id values.  Maybe it used to, or
maybe I just misremembered.

Never mind, should be fine.
Eduardo Habkost March 19, 2015, 1:58 p.m. UTC | #10
On Wed, Mar 18, 2015 at 11:50:04AM +0530, Bharata B Rao wrote:
> On Tue, Mar 17, 2015 at 11:56:04AM +0100, Andreas Färber wrote:
> > Am 17.03.2015 um 09:39 schrieb Bharata B Rao:
> > > On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
> > >>
> > >>
> > >> On 13.03.15 12:56, Bharata B Rao wrote:
> > >>> From: Bharata B Rao <bharata.rao@gmail.com>
[...]
> > >>>  
> > >>> +static void alpha_cpu_finalize(Object *obj)
> > >>> +{
> > >>> +    cpu_exec_exit(CPU(obj));
> > >>> +}
> > >>> +
> > >>>  static void alpha_cpu_initfn(Object *obj)
> > >>>  {
> > >>>      CPUState *cs = CPU(obj);
> > >>> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
> > >>>      .parent = TYPE_CPU,
> > >>>      .instance_size = sizeof(AlphaCPU),
> > >>>      .instance_init = alpha_cpu_initfn,
> > >>> +    .instance_finalize = alpha_cpu_finalize,
> > >>
> > >> Would it be possible to put this into TYPE_CPU->instance_finalize instead?
> > > 
> > > Yes possible and that would be much cleaner since I wouldn't have to touch
> > > all archs. But it will be asymmetric in some sense as cpu_exec_init() is
> > > called from all individual cpus' instance_init but cpu_exec_exit() will be
> > > called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
> > > post v2 with this change.
> > 
> > Could you check: Wasn't there a patch from Fujitsu to move
> > cpu_exec_init() to generic code? If both were generic, that would be
> > fine. If that is problematic, I would accept the mismatch as long as it
> > is "safe". That is, instance_finalize needs to handle any state of the
> > object,
> 
> There is a patch from Zhu to move only vmstate_register related bits
> from cpu_exec_init to cpu_common_realizefn.
> 
> http://lists.gnu.org/archive/html/qemu-devel/2015-01/msg01550.html
> 
> > and I think these two are better suited for realize/unrealize
> > than instance_init/instance_finalize.
> 
> And Eduardo has a patch to move cpu_exec_init call from instance_init
> to realize for target-i386.
> 
> https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg01056.html
> 
> So if the preferred way is to call cpu_exec_init from realize, then
> Eduardo - Can you do this for all archs ? My limited testing shows
> that it (moving cpu_exec_init from instance_init to realize) works for
> sPAPR PowerPC too, but not sure about other targets.

I can submit a RFC for all arches, but I don't have the ability to test
all of them. I think it is likely that some architectures have
additional code inside instance_init that depends on cpu_exec_init()
being called (i386 was one of them, until we removed cpu_index-based
default apic_id calculation).

We need to call cpu_exec_init() from realize because instance_init
shouldn't have any side-effects except on the state of the object
itself. See the discussion at:
  http://thread.gmane.org/gmane.comp.emulators.qemu/326307
  Subject: [PATCH] qdev: Make -device FOO, help help again when FOO is not pluggable
  Message-Id: <1426527232-15044-1-git-send-email-armbru@redhat.com>
Bharata B Rao May 7, 2015, 9:35 a.m. UTC | #11
On Wed, Mar 18, 2015 at 11:50:04AM +0530, Bharata B Rao wrote:
> On Tue, Mar 17, 2015 at 11:56:04AM +0100, Andreas Färber wrote:
> > Am 17.03.2015 um 09:39 schrieb Bharata B Rao:
> > > On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
> > >>
> > >>
> > >> On 13.03.15 12:56, Bharata B Rao wrote:
> > >>> From: Bharata B Rao <bharata.rao@gmail.com>
> > >>>
> > >>> Currently CPUState.cpu_index is monotonically increasing and a newly
> > >>> created CPU always gets the next higher index. The next available
> > >>> index is calculated by counting the existing number of CPUs. This is
> > >>> fine as long as we only add CPUs, but there are architectures which
> > >>> are starting to support CPU removal too. For an architecture like PowerPC
> > >>> which derives its CPU identifier (device tree ID) from cpu_index, the
> > >>> existing logic of generating cpu_index values causes problems.
> > >>>
> > >>> With the currently proposed method of handling vCPU removal by parking
> > >>> the vCPU fd in QEMU
> > >>> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > >>> generating cpu_index this way will not work for PowerPC.
> > >>>
> > >>> This patch changes the way cpu_index is handed out by maintaining
> > >>> a bit map of the CPUs that tracks both addition and removal of CPUs.
> > >>>
> > >>> I am not sure if this is the right and an acceptable approach. The
> > >>> alternative is to do something similar for PowerPC alone and not
> > >>> depend on cpu_index.
> > >>>
> > >>> I have tested this with out-of-the-tree patches for CPU hot plug and
> > >>> removal on x86 and sPAPR PowerPC.
> > >>>
> > >>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > >>> ---
> > >>>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
> > >>>  include/exec/exec-all.h     |  1 +
> > >>>  target-alpha/cpu.c          |  6 ++++++
> > >>>  target-arm/cpu.c            |  1 +
> > >>>  target-cris/cpu.c           |  6 ++++++
> > >>>  target-i386/cpu.c           |  6 ++++++
> > >>>  target-lm32/cpu.c           |  6 ++++++
> > >>>  target-m68k/cpu.c           |  6 ++++++
> > >>>  target-microblaze/cpu.c     |  6 ++++++
> > >>>  target-mips/cpu.c           |  6 ++++++
> > >>>  target-moxie/cpu.c          |  6 ++++++
> > >>>  target-openrisc/cpu.c       |  6 ++++++
> > >>>  target-ppc/translate_init.c |  6 ++++++
> > >>>  target-s390x/cpu.c          |  1 +
> > >>>  target-sh4/cpu.c            |  6 ++++++
> > >>>  target-sparc/cpu.c          |  1 +
> > >>>  target-tricore/cpu.c        |  5 +++++
> > >>>  target-unicore32/cpu.c      |  6 ++++++
> > >>>  target-xtensa/cpu.c         |  6 ++++++
> > >>>  19 files changed, 116 insertions(+), 10 deletions(-)
> > >>>
> > >>> diff --git a/exec.c b/exec.c
> > >>> index e97071a..7760f2d 100644
> > >>> --- a/exec.c
> > >>> +++ b/exec.c
> > >>> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
> > >>>  }
> > >>>  #endif
> > >>>  
> > >>> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> > >>> +
> > >>> +#ifdef CONFIG_USER_ONLY
> > >>> +int max_cpus = 1; /* TODO: Check if this is correct ? */
> > >>> +#endif
> > >>> +
> > >>> +static int cpu_get_free_index(void)
> > >>> +{
> > >>> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> > >>> +
> > >>> +    if (cpu == max_cpus) {
> > >>> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> > >>> +                        "CPUs than allowed max of %d\n", max_cpus);
> > >>> +        return max_cpus;
> > >>> +    } else {
> > >>> +        bitmap_set(cpu_index_map, cpu, 1);
> > >>> +        return cpu;
> > >>> +    }
> > >>> +}
> > >>> +
> > >>> +void cpu_exec_exit(CPUState *cpu)
> > >>> +{
> > >>> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> > >>> +}
> > >>> +
> > >>>  void cpu_exec_init(CPUArchState *env)
> > >>>  {
> > >>>      CPUState *cpu = ENV_GET_CPU(env);
> > >>>      CPUClass *cc = CPU_GET_CLASS(cpu);
> > >>> -    CPUState *some_cpu;
> > >>> -    int cpu_index;
> > >>>  
> > >>>  #if defined(CONFIG_USER_ONLY)
> > >>>      cpu_list_lock();
> > >>>  #endif
> > >>> -    cpu_index = 0;
> > >>> -    CPU_FOREACH(some_cpu) {
> > >>> -        cpu_index++;
> > >>> -    }
> > >>> -    cpu->cpu_index = cpu_index;
> > >>> +    cpu->cpu_index = cpu_get_free_index();
> > >>>      cpu->numa_node = 0;
> > >>>      QTAILQ_INIT(&cpu->breakpoints);
> > >>>      QTAILQ_INIT(&cpu->watchpoints);
> > >>> @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
> > >>>      cpu_list_unlock();
> > >>>  #endif
> > >>>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> > >>> -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
> > >>> +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
> > >>>      }
> > >>>  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> > >>> -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
> > >>> +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
> > >>>                      cpu_save, cpu_load, env);
> > >>>      assert(cc->vmsd == NULL);
> > >>>      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
> > >>>  #endif
> > >>>      if (cc->vmsd != NULL) {
> > >>> -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
> > >>> +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
> > >>>      }
> > >>>  }
> > >>>  
> > >>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> > >>> index 8eb0db3..95fbba0 100644
> > >>> --- a/include/exec/exec-all.h
> > >>> +++ b/include/exec/exec-all.h
> > >>> @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> > >>>                                target_ulong pc, target_ulong cs_base, int flags,
> > >>>                                int cflags);
> > >>>  void cpu_exec_init(CPUArchState *env);
> > >>> +void cpu_exec_exit(CPUState *cpu);
> > >>>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
> > >>>  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
> > >>>  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
> > >>> diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
> > >>> index a98b7d8..7c57165 100644
> > >>> --- a/target-alpha/cpu.c
> > >>> +++ b/target-alpha/cpu.c
> > >>> @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
> > >>>      .parent = TYPE("ev67"),
> > >>>  };
> > >>>  
> > >>> +static void alpha_cpu_finalize(Object *obj)
> > >>> +{
> > >>> +    cpu_exec_exit(CPU(obj));
> > >>> +}
> > >>> +
> > >>>  static void alpha_cpu_initfn(Object *obj)
> > >>>  {
> > >>>      CPUState *cs = CPU(obj);
> > >>> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
> > >>>      .parent = TYPE_CPU,
> > >>>      .instance_size = sizeof(AlphaCPU),
> > >>>      .instance_init = alpha_cpu_initfn,
> > >>> +    .instance_finalize = alpha_cpu_finalize,
> > >>
> > >> Would it be possible to put this into TYPE_CPU->instance_finalize instead?
> > > 
> > > Yes possible and that would be much cleaner since I wouldn't have to touch
> > > all archs. But it will be asymmetric in some sense as cpu_exec_init() is
> > > called from all individual cpus' instance_init but cpu_exec_exit() will be
> > > called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
> > > post v2 with this change.
> > 
> > Could you check: Wasn't there a patch from Fujitsu to move
> > cpu_exec_init() to generic code? If both were generic, that would be
> > fine. If that is problematic, I would accept the mismatch as long as it
> > is "safe". That is, instance_finalize needs to handle any state of the
> > object,
> 
> There is a patch from Zhu to move only vmstate_register related bits
> from cpu_exec_init to cpu_common_realizefn.
> 
> http://lists.gnu.org/archive/html/qemu-devel/2015-01/msg01550.html
> 
> > and I think these two are better suited for realize/unrealize
> > than instance_init/instance_finalize.
> 
> And Eduardo has a patch to move cpu_exec_init call from instance_init
> to realize for target-i386.
> 
> https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg01056.html
> 
> So if the preferred way is to call cpu_exec_init from realize, then
> Eduardo - Can you do this for all archs ? My limited testing shows
> that it (moving cpu_exec_init from instance_init to realize) works for
> sPAPR PowerPC too, but not sure about other targets.
> 
> After there is consensus on the above two patches, I can do the
> cpu_index to bitmap changes.

I was planning to re-post this patch now, but wanted to know the plan
regarding the related two patchsets that I have mentioned above.

1. Patch by Eduardo to move cpu_exec_init from instance_init to realizefn
2. Patch by Zhu to move vmstate_register related bits from cpu_exec_init
   to cpu_common_realizefn.

I don't see any more updates on this, are these obsolete or still under
consideration ?

Regards,
Bharata.
Eduardo Habkost May 7, 2015, 3:33 p.m. UTC | #12
On Thu, May 07, 2015 at 03:05:45PM +0530, Bharata B Rao wrote:
> On Wed, Mar 18, 2015 at 11:50:04AM +0530, Bharata B Rao wrote:
> > On Tue, Mar 17, 2015 at 11:56:04AM +0100, Andreas Färber wrote:
> > > Am 17.03.2015 um 09:39 schrieb Bharata B Rao:
> > > > On Tue, Mar 17, 2015 at 07:56:41AM +0100, Alexander Graf wrote:
> > > >>
> > > >>
> > > >> On 13.03.15 12:56, Bharata B Rao wrote:
> > > >>> From: Bharata B Rao <bharata.rao@gmail.com>
> > > >>>
> > > >>> Currently CPUState.cpu_index is monotonically increasing and a newly
> > > >>> created CPU always gets the next higher index. The next available
> > > >>> index is calculated by counting the existing number of CPUs. This is
> > > >>> fine as long as we only add CPUs, but there are architectures which
> > > >>> are starting to support CPU removal too. For an architecture like PowerPC
> > > >>> which derives its CPU identifier (device tree ID) from cpu_index, the
> > > >>> existing logic of generating cpu_index values causes problems.
> > > >>>
> > > >>> With the currently proposed method of handling vCPU removal by parking
> > > >>> the vCPU fd in QEMU
> > > >>> (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html),
> > > >>> generating cpu_index this way will not work for PowerPC.
> > > >>>
> > > >>> This patch changes the way cpu_index is handed out by maintaining
> > > >>> a bit map of the CPUs that tracks both addition and removal of CPUs.
> > > >>>
> > > >>> I am not sure if this is the right and an acceptable approach. The
> > > >>> alternative is to do something similar for PowerPC alone and not
> > > >>> depend on cpu_index.
> > > >>>
> > > >>> I have tested this with out-of-the-tree patches for CPU hot plug and
> > > >>> removal on x86 and sPAPR PowerPC.
> > > >>>
> > > >>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > > >>> ---
> > > >>>  exec.c                      | 39 +++++++++++++++++++++++++++++----------
> > > >>>  include/exec/exec-all.h     |  1 +
> > > >>>  target-alpha/cpu.c          |  6 ++++++
> > > >>>  target-arm/cpu.c            |  1 +
> > > >>>  target-cris/cpu.c           |  6 ++++++
> > > >>>  target-i386/cpu.c           |  6 ++++++
> > > >>>  target-lm32/cpu.c           |  6 ++++++
> > > >>>  target-m68k/cpu.c           |  6 ++++++
> > > >>>  target-microblaze/cpu.c     |  6 ++++++
> > > >>>  target-mips/cpu.c           |  6 ++++++
> > > >>>  target-moxie/cpu.c          |  6 ++++++
> > > >>>  target-openrisc/cpu.c       |  6 ++++++
> > > >>>  target-ppc/translate_init.c |  6 ++++++
> > > >>>  target-s390x/cpu.c          |  1 +
> > > >>>  target-sh4/cpu.c            |  6 ++++++
> > > >>>  target-sparc/cpu.c          |  1 +
> > > >>>  target-tricore/cpu.c        |  5 +++++
> > > >>>  target-unicore32/cpu.c      |  6 ++++++
> > > >>>  target-xtensa/cpu.c         |  6 ++++++
> > > >>>  19 files changed, 116 insertions(+), 10 deletions(-)
> > > >>>
> > > >>> diff --git a/exec.c b/exec.c
> > > >>> index e97071a..7760f2d 100644
> > > >>> --- a/exec.c
> > > >>> +++ b/exec.c
> > > >>> @@ -530,21 +530,40 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
> > > >>>  }
> > > >>>  #endif
> > > >>>  
> > > >>> +static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
> > > >>> +
> > > >>> +#ifdef CONFIG_USER_ONLY
> > > >>> +int max_cpus = 1; /* TODO: Check if this is correct ? */
> > > >>> +#endif
> > > >>> +
> > > >>> +static int cpu_get_free_index(void)
> > > >>> +{
> > > >>> +    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
> > > >>> +
> > > >>> +    if (cpu == max_cpus) {
> > > >>> +        fprintf(stderr, "WARNING: qemu: Trying to use more "
> > > >>> +                        "CPUs than allowed max of %d\n", max_cpus);
> > > >>> +        return max_cpus;
> > > >>> +    } else {
> > > >>> +        bitmap_set(cpu_index_map, cpu, 1);
> > > >>> +        return cpu;
> > > >>> +    }
> > > >>> +}
> > > >>> +
> > > >>> +void cpu_exec_exit(CPUState *cpu)
> > > >>> +{
> > > >>> +    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
> > > >>> +}
> > > >>> +
> > > >>>  void cpu_exec_init(CPUArchState *env)
> > > >>>  {
> > > >>>      CPUState *cpu = ENV_GET_CPU(env);
> > > >>>      CPUClass *cc = CPU_GET_CLASS(cpu);
> > > >>> -    CPUState *some_cpu;
> > > >>> -    int cpu_index;
> > > >>>  
> > > >>>  #if defined(CONFIG_USER_ONLY)
> > > >>>      cpu_list_lock();
> > > >>>  #endif
> > > >>> -    cpu_index = 0;
> > > >>> -    CPU_FOREACH(some_cpu) {
> > > >>> -        cpu_index++;
> > > >>> -    }
> > > >>> -    cpu->cpu_index = cpu_index;
> > > >>> +    cpu->cpu_index = cpu_get_free_index();
> > > >>>      cpu->numa_node = 0;
> > > >>>      QTAILQ_INIT(&cpu->breakpoints);
> > > >>>      QTAILQ_INIT(&cpu->watchpoints);
> > > >>> @@ -558,16 +577,16 @@ void cpu_exec_init(CPUArchState *env)
> > > >>>      cpu_list_unlock();
> > > >>>  #endif
> > > >>>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> > > >>> -        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
> > > >>> +        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
> > > >>>      }
> > > >>>  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> > > >>> -    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
> > > >>> +    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
> > > >>>                      cpu_save, cpu_load, env);
> > > >>>      assert(cc->vmsd == NULL);
> > > >>>      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
> > > >>>  #endif
> > > >>>      if (cc->vmsd != NULL) {
> > > >>> -        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
> > > >>> +        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
> > > >>>      }
> > > >>>  }
> > > >>>  
> > > >>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> > > >>> index 8eb0db3..95fbba0 100644
> > > >>> --- a/include/exec/exec-all.h
> > > >>> +++ b/include/exec/exec-all.h
> > > >>> @@ -89,6 +89,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
> > > >>>                                target_ulong pc, target_ulong cs_base, int flags,
> > > >>>                                int cflags);
> > > >>>  void cpu_exec_init(CPUArchState *env);
> > > >>> +void cpu_exec_exit(CPUState *cpu);
> > > >>>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
> > > >>>  int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
> > > >>>  void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
> > > >>> diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
> > > >>> index a98b7d8..7c57165 100644
> > > >>> --- a/target-alpha/cpu.c
> > > >>> +++ b/target-alpha/cpu.c
> > > >>> @@ -250,6 +250,11 @@ static const TypeInfo ev68_cpu_type_info = {
> > > >>>      .parent = TYPE("ev67"),
> > > >>>  };
> > > >>>  
> > > >>> +static void alpha_cpu_finalize(Object *obj)
> > > >>> +{
> > > >>> +    cpu_exec_exit(CPU(obj));
> > > >>> +}
> > > >>> +
> > > >>>  static void alpha_cpu_initfn(Object *obj)
> > > >>>  {
> > > >>>      CPUState *cs = CPU(obj);
> > > >>> @@ -305,6 +310,7 @@ static const TypeInfo alpha_cpu_type_info = {
> > > >>>      .parent = TYPE_CPU,
> > > >>>      .instance_size = sizeof(AlphaCPU),
> > > >>>      .instance_init = alpha_cpu_initfn,
> > > >>> +    .instance_finalize = alpha_cpu_finalize,
> > > >>
> > > >> Would it be possible to put this into TYPE_CPU->instance_finalize instead?
> > > > 
> > > > Yes possible and that would be much cleaner since I wouldn't have to touch
> > > > all archs. But it will be asymmetric in some sense as cpu_exec_init() is
> > > > called from all individual cpus' instance_init but cpu_exec_exit() will be
> > > > called from parent's (TYPE_CPU) instance_finalize. If that is fine, I shall
> > > > post v2 with this change.
> > > 
> > > Could you check: Wasn't there a patch from Fujitsu to move
> > > cpu_exec_init() to generic code? If both were generic, that would be
> > > fine. If that is problematic, I would accept the mismatch as long as it
> > > is "safe". That is, instance_finalize needs to handle any state of the
> > > object,
> > 
> > There is a patch from Zhu to move only vmstate_register related bits
> > from cpu_exec_init to cpu_common_realizefn.
> > 
> > http://lists.gnu.org/archive/html/qemu-devel/2015-01/msg01550.html
> > 
> > > and I think these two are better suited for realize/unrealize
> > > than instance_init/instance_finalize.
> > 
> > And Eduardo has a patch to move cpu_exec_init call from instance_init
> > to realize for target-i386.
> > 
> > https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg01056.html
> > 
> > So if the preferred way is to call cpu_exec_init from realize, then
> > Eduardo - Can you do this for all archs ? My limited testing shows
> > that it (moving cpu_exec_init from instance_init to realize) works for
> > sPAPR PowerPC too, but not sure about other targets.
> > 
> > After there is consensus on the above two patches, I can do the
> > cpu_index to bitmap changes.
> 
> I was planning to re-post this patch now, but wanted to know the plan
> regarding the related two patchsets that I have mentioned above.
> 
> 1. Patch by Eduardo to move cpu_exec_init from instance_init to realizefn
> 2. Patch by Zhu to move vmstate_register related bits from cpu_exec_init
>    to cpu_common_realizefn.
> 
> I don't see any more updates on this, are these obsolete or still under
> consideration ?

#1 is still planned for x86. Other architectures probably need it, but I
think it should be done by somebody familiar with each architecture.
There may be subtle initialization ordering requirements in
arch-specific CPU init code (that happened on x86, when we had
cpu_index-based apic_id initialization on initfn).

I don't know if this was suggested before: if you make cpu_exec_exit()
idempotent (and safe to call even if cpu_exec_init() wasn't called), we
can simply call it unconditionally in CPUClass::instance_finalize,
because if the CPU object is already being destroyed, we know it must be
removed from the global CPU list.

Later, arch-specific code may decide to call cpu_exec_exit() earlier, if
appropriate (on unrealizefn, for example). But at least we don't need to
force all architectures to provide a instance_finalize function just
because of cpu_exec_exit().
diff mbox

Patch

diff --git a/exec.c b/exec.c
index e97071a..7760f2d 100644
--- a/exec.c
+++ b/exec.c
@@ -530,21 +530,40 @@  void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
 }
 #endif
 
+static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
+
+#ifdef CONFIG_USER_ONLY
+int max_cpus = 1; /* TODO: Check if this is correct ? */
+#endif
+
+static int cpu_get_free_index(void)
+{
+    int cpu = find_first_zero_bit(cpu_index_map, max_cpus);
+
+    if (cpu == max_cpus) {
+        fprintf(stderr, "WARNING: qemu: Trying to use more "
+                        "CPUs than allowed max of %d\n", max_cpus);
+        return max_cpus;
+    } else {
+        bitmap_set(cpu_index_map, cpu, 1);
+        return cpu;
+    }
+}
+
+void cpu_exec_exit(CPUState *cpu)
+{
+    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
+}
+
 void cpu_exec_init(CPUArchState *env)
 {
     CPUState *cpu = ENV_GET_CPU(env);
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    CPUState *some_cpu;
-    int cpu_index;
 
 #if defined(CONFIG_USER_ONLY)
     cpu_list_lock();
 #endif
-    cpu_index = 0;
-    CPU_FOREACH(some_cpu) {
-        cpu_index++;
-    }
-    cpu->cpu_index = cpu_index;
+    cpu->cpu_index = cpu_get_free_index();
     cpu->numa_node = 0;
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
@@ -558,16 +577,16 @@  void cpu_exec_init(CPUArchState *env)
     cpu_list_unlock();
 #endif
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
-        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
+        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
     }
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
-    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
+    register_savevm(NULL, "cpu", cpu->cpu_index, CPU_SAVE_VERSION,
                     cpu_save, cpu_load, env);
     assert(cc->vmsd == NULL);
     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
 #endif
     if (cc->vmsd != NULL) {
-        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
+        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
     }
 }
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 8eb0db3..95fbba0 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -89,6 +89,7 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base, int flags,
                               int cflags);
 void cpu_exec_init(CPUArchState *env);
+void cpu_exec_exit(CPUState *cpu);
 void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
 int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index a98b7d8..7c57165 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -250,6 +250,11 @@  static const TypeInfo ev68_cpu_type_info = {
     .parent = TYPE("ev67"),
 };
 
+static void alpha_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void alpha_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -305,6 +310,7 @@  static const TypeInfo alpha_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(AlphaCPU),
     .instance_init = alpha_cpu_initfn,
+    .instance_finalize = alpha_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(AlphaCPUClass),
     .class_init = alpha_cpu_class_init,
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 986f04c..73d9b91 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -454,6 +454,7 @@  static void arm_cpu_finalizefn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
     g_hash_table_destroy(cpu->cp_regs);
+    cpu_exec_exit(CPU(obj));
 }
 
 static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
diff --git a/target-cris/cpu.c b/target-cris/cpu.c
index 16cfba9..5242764 100644
--- a/target-cris/cpu.c
+++ b/target-cris/cpu.c
@@ -161,6 +161,11 @@  static void cris_cpu_set_irq(void *opaque, int irq, int level)
 }
 #endif
 
+static void cris_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void cris_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -299,6 +304,7 @@  static const TypeInfo cris_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(CRISCPU),
     .instance_init = cris_cpu_initfn,
+    .instance_finalize = cris_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(CRISCPUClass),
     .class_init = cris_cpu_class_init,
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index ed7e5d5..7e4903f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2822,6 +2822,11 @@  out:
     }
 }
 
+static void x86_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void x86_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -2994,6 +2999,7 @@  static const TypeInfo x86_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(X86CPU),
     .instance_init = x86_cpu_initfn,
+    .instance_finalize = x86_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(X86CPUClass),
     .class_init = x86_cpu_common_class_init,
diff --git a/target-lm32/cpu.c b/target-lm32/cpu.c
index f8081f5..0634f0e 100644
--- a/target-lm32/cpu.c
+++ b/target-lm32/cpu.c
@@ -143,6 +143,11 @@  static void lm32_cpu_realizefn(DeviceState *dev, Error **errp)
     lcc->parent_realize(dev, errp);
 }
 
+static void lm32_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void lm32_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -294,6 +299,7 @@  static const TypeInfo lm32_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(LM32CPU),
     .instance_init = lm32_cpu_initfn,
+    .instance_finalize = lm32_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(LM32CPUClass),
     .class_init = lm32_cpu_class_init,
diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index 4cfb725..80d4f7d 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -160,6 +160,11 @@  static void m68k_cpu_realizefn(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+static void m68k_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void m68k_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -231,6 +236,7 @@  static const TypeInfo m68k_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(M68kCPU),
     .instance_init = m68k_cpu_initfn,
+    .instance_finalize = m68k_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(M68kCPUClass),
     .class_init = m68k_cpu_class_init,
diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
index 67e3182..34e9009 100644
--- a/target-microblaze/cpu.c
+++ b/target-microblaze/cpu.c
@@ -122,6 +122,11 @@  static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+static void mb_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void mb_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -190,6 +195,7 @@  static const TypeInfo mb_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(MicroBlazeCPU),
     .instance_init = mb_cpu_initfn,
+    .instance_finalize = mb_cpu_finalize,
     .class_size = sizeof(MicroBlazeCPUClass),
     .class_init = mb_cpu_class_init,
 };
diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index 958c999..a78556d 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c
@@ -108,6 +108,11 @@  static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+static void mips_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void mips_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -160,6 +165,7 @@  static const TypeInfo mips_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(MIPSCPU),
     .instance_init = mips_cpu_initfn,
+    .instance_finalize = mips_cpu_finalize,
     .abstract = false,
     .class_size = sizeof(MIPSCPUClass),
     .class_init = mips_cpu_class_init,
diff --git a/target-moxie/cpu.c b/target-moxie/cpu.c
index 47b617f..2eafaae 100644
--- a/target-moxie/cpu.c
+++ b/target-moxie/cpu.c
@@ -59,6 +59,11 @@  static void moxie_cpu_realizefn(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+static void moxie_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void moxie_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -160,6 +165,7 @@  static const TypeInfo moxie_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(MoxieCPU),
     .instance_init = moxie_cpu_initfn,
+    .instance_finalize = moxie_cpu_finalize,
     .class_size = sizeof(MoxieCPUClass),
     .class_init = moxie_cpu_class_init,
 };
diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 39bedc1..4c2bbb5 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -85,6 +85,11 @@  static void openrisc_cpu_realizefn(DeviceState *dev, Error **errp)
     occ->parent_realize(dev, errp);
 }
 
+static void openrisc_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void openrisc_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -198,6 +203,7 @@  static const TypeInfo openrisc_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(OpenRISCCPU),
     .instance_init = openrisc_cpu_initfn,
+    .instance_finalize = openrisc_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(OpenRISCCPUClass),
     .class_init = openrisc_cpu_class_init,
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index d74f4f0..f301ff7 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9625,6 +9625,11 @@  static bool ppc_cpu_is_big_endian(CPUState *cs)
 }
 #endif
 
+static void ppc_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void ppc_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -9738,6 +9743,7 @@  static const TypeInfo ppc_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(PowerPCCPU),
     .instance_init = ppc_cpu_initfn,
+    .instance_finalize = ppc_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(PowerPCCPUClass),
     .class_init = ppc_cpu_class_init,
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index e0537fa..b00531b 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -214,6 +214,7 @@  static void s390_cpu_finalize(Object *obj)
 
     qemu_unregister_reset(s390_cpu_machine_reset_cb, cpu);
 #endif
+    cpu_exec_exit(CPU(obj));
 }
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c
index d187a2b..c5fdde0 100644
--- a/target-sh4/cpu.c
+++ b/target-sh4/cpu.c
@@ -240,6 +240,11 @@  static void superh_cpu_realizefn(DeviceState *dev, Error **errp)
     scc->parent_realize(dev, errp);
 }
 
+static void superh_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void superh_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -296,6 +301,7 @@  static const TypeInfo superh_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(SuperHCPU),
     .instance_init = superh_cpu_initfn,
+    .instance_finalize = superh_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(SuperHCPUClass),
     .class_init = superh_cpu_class_init,
diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index a952097..67febc7 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -815,6 +815,7 @@  static void sparc_cpu_uninitfn(Object *obj)
     CPUSPARCState *env = &cpu->env;
 
     g_free(env->def);
+    cpu_exec_exit(CPU(obj));
 }
 
 static void sparc_cpu_class_init(ObjectClass *oc, void *data)
diff --git a/target-tricore/cpu.c b/target-tricore/cpu.c
index 2ba0cf4..384188c 100644
--- a/target-tricore/cpu.c
+++ b/target-tricore/cpu.c
@@ -80,6 +80,10 @@  static void tricore_cpu_realizefn(DeviceState *dev, Error **errp)
     tcc->parent_realize(dev, errp);
 }
 
+static void tricore_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
 
 static void tricore_cpu_initfn(Object *obj)
 {
@@ -180,6 +184,7 @@  static const TypeInfo tricore_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(TriCoreCPU),
     .instance_init = tricore_cpu_initfn,
+    .instance_finalize = tricore_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(TriCoreCPUClass),
     .class_init = tricore_cpu_class_init,
diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 5b32987..9ee4837 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -103,6 +103,11 @@  static void uc32_cpu_realizefn(DeviceState *dev, Error **errp)
     ucc->parent_realize(dev, errp);
 }
 
+static void uc32_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void uc32_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -174,6 +179,7 @@  static const TypeInfo uc32_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(UniCore32CPU),
     .instance_init = uc32_cpu_initfn,
+    .instance_finalize = uc32_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(UniCore32CPUClass),
     .class_init = uc32_cpu_class_init,
diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index 2b75678..74aa075 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -104,6 +104,11 @@  static void xtensa_cpu_realizefn(DeviceState *dev, Error **errp)
     xcc->parent_realize(dev, errp);
 }
 
+static void xtensa_cpu_finalize(Object *obj)
+{
+    cpu_exec_exit(CPU(obj));
+}
+
 static void xtensa_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -162,6 +167,7 @@  static const TypeInfo xtensa_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(XtensaCPU),
     .instance_init = xtensa_cpu_initfn,
+    .instance_finalize = xtensa_cpu_finalize,
     .abstract = true,
     .class_size = sizeof(XtensaCPUClass),
     .class_init = xtensa_cpu_class_init,