diff mbox series

[v2] accel: forbid early use of kvm_enabled() and friends

Message ID 153026817452.402489.13386335348113684056.stgit@bahia.lan
State New
Headers show
Series [v2] accel: forbid early use of kvm_enabled() and friends | expand

Commit Message

Greg Kurz June 29, 2018, 10:29 a.m. UTC
It is unsafe to rely on *_enabled() helpers before the accelerator has
been initialized, ie, accel_init_machine() has succeeded, because they
always return false. But it is still possible to end up calling them
indirectly by inadvertance, and cause QEMU to misbehave.

This patch causes QEMU to abort if we try to check for an accelerator
before it has been set up. This will help to catch bugs earlier.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---

This patch was motivated by an regression we're currently fixing in
spapr because of an early use of kvm_enabled(). David suggested to
post this patch separately:

https://lists.nongnu.org/archive/html/qemu-ppc/2018-06/msg01136.html

v2: - dropped change in qom/cpu.c (useless header inclusion)
    - only #include "sysemu/kvm.h" if we actually need it
    - added David's R-b from v1 because changes in v2 are minor
---
 accel/accel.c          |    7 +++++++
 include/qemu-common.h  |    3 ++-
 include/sysemu/accel.h |    1 +
 include/sysemu/kvm.h   |    3 ++-
 stubs/Makefile.objs    |    1 +
 stubs/accel.c          |   14 ++++++++++++++
 target/i386/hax-all.c  |    2 +-
 target/i386/whpx-all.c |    2 +-
 8 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 stubs/accel.c

Comments

Paolo Bonzini June 29, 2018, 10:35 a.m. UTC | #1
On 29/06/2018 12:29, Greg Kurz wrote:
> It is unsafe to rely on *_enabled() helpers before the accelerator has
> been initialized, ie, accel_init_machine() has succeeded, because they
> always return false. But it is still possible to end up calling them
> indirectly by inadvertance, and cause QEMU to misbehave.
> 
> This patch causes QEMU to abort if we try to check for an accelerator
> before it has been set up. This will help to catch bugs earlier.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> 
> This patch was motivated by an regression we're currently fixing in
> spapr because of an early use of kvm_enabled(). David suggested to
> post this patch separately:
> 
> https://lists.nongnu.org/archive/html/qemu-ppc/2018-06/msg01136.html
> 
> v2: - dropped change in qom/cpu.c (useless header inclusion)
>     - only #include "sysemu/kvm.h" if we actually need it
>     - added David's R-b from v1 because changes in v2 are minor

This adds a function call on possibly hot paths.  Can you make it inline?

Also asserting current_machine != NULL is not necessary, since you're
immediately dereferencing it.

Thanks,

Paolo

> ---
>  accel/accel.c          |    7 +++++++
>  include/qemu-common.h  |    3 ++-
>  include/sysemu/accel.h |    1 +
>  include/sysemu/kvm.h   |    3 ++-
>  stubs/Makefile.objs    |    1 +
>  stubs/accel.c          |   14 ++++++++++++++
>  target/i386/hax-all.c  |    2 +-
>  target/i386/whpx-all.c |    2 +-
>  8 files changed, 29 insertions(+), 4 deletions(-)
>  create mode 100644 stubs/accel.c
> 
> diff --git a/accel/accel.c b/accel/accel.c
> index 966b2d8f536c..27900aac9cc5 100644
> --- a/accel/accel.c
> +++ b/accel/accel.c
> @@ -51,6 +51,13 @@ static AccelClass *accel_find(const char *opt_name)
>      return ac;
>  }
>  
> +bool assert_accelerator_initialized(bool allowed)
> +{
> +    assert(current_machine != NULL);
> +    assert(current_machine->accelerator != NULL);
> +    return allowed;
> +}
> +
>  static int accel_init_machine(AccelClass *acc, MachineState *ms)
>  {
>      ObjectClass *oc = OBJECT_CLASS(acc);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index 85f4749aefb7..01d5e4d97dbf 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -82,7 +82,8 @@ int qemu_openpty_raw(int *aslave, char *pty_name);
>  extern bool tcg_allowed;
>  void tcg_exec_init(unsigned long tb_size);
>  #ifdef CONFIG_TCG
> -#define tcg_enabled() (tcg_allowed)
> +#include "sysemu/accel.h"
> +#define tcg_enabled() (assert_accelerator_initialized(tcg_allowed))
>  #else
>  #define tcg_enabled() 0
>  #endif
> diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h
> index 637358f43014..76965cb69cc9 100644
> --- a/include/sysemu/accel.h
> +++ b/include/sysemu/accel.h
> @@ -71,5 +71,6 @@ void configure_accelerator(MachineState *ms);
>  void accel_register_compat_props(AccelState *accel);
>  /* Called just before os_setup_post (ie just before drop OS privs) */
>  void accel_setup_post(MachineState *ms);
> +bool assert_accelerator_initialized(bool allowed);
>  
>  #endif
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 0b64b8e06786..5a2e59e99128 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -46,7 +46,8 @@ extern bool kvm_direct_msi_allowed;
>  extern bool kvm_ioeventfd_any_length_allowed;
>  extern bool kvm_msi_use_devid;
>  
> -#define kvm_enabled()           (kvm_allowed)
> +#include "sysemu/accel.h"
> +#define kvm_enabled()           (assert_accelerator_initialized(kvm_allowed))
>  /**
>   * kvm_irqchip_in_kernel:
>   *
> diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
> index 53d3f32cb258..2d5142287525 100644
> --- a/stubs/Makefile.objs
> +++ b/stubs/Makefile.objs
> @@ -43,3 +43,4 @@ stub-obj-y += xen-common.o
>  stub-obj-y += xen-hvm.o
>  stub-obj-y += pci-host-piix.o
>  stub-obj-y += ram-block.o
> +stub-obj-y += accel.o
> diff --git a/stubs/accel.c b/stubs/accel.c
> new file mode 100644
> index 000000000000..4f480f2d3f29
> --- /dev/null
> +++ b/stubs/accel.c
> @@ -0,0 +1,14 @@
> +/*
> + * accel stubs
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "sysemu/accel.h"
> +
> +bool assert_accelerator_initialized(bool allowed)
> +{
> +    return allowed;
> +}
> diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
> index d2e512856bb8..7c78bd7d094d 100644
> --- a/target/i386/hax-all.c
> +++ b/target/i386/hax-all.c
> @@ -57,7 +57,7 @@ static int hax_arch_get_registers(CPUArchState *env);
>  
>  int hax_enabled(void)
>  {
> -    return hax_allowed;
> +    return assert_accelerator_initialized(hax_allowed);
>  }
>  
>  int valid_hax_tunnel_size(uint16_t size)
> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> index 6b42096698ee..e7f6bc5958e7 100644
> --- a/target/i386/whpx-all.c
> +++ b/target/i386/whpx-all.c
> @@ -1422,7 +1422,7 @@ static int whpx_accel_init(MachineState *ms)
>  
>  int whpx_enabled(void)
>  {
> -    return whpx_allowed;
> +    return assert_accelerator_initialized(whpx_allowed);
>  }
>  
>  static void whpx_accel_class_init(ObjectClass *oc, void *data)
>
Daniel P. Berrangé June 29, 2018, 10:39 a.m. UTC | #2
On Fri, Jun 29, 2018 at 12:35:09PM +0200, Paolo Bonzini wrote:
> On 29/06/2018 12:29, Greg Kurz wrote:
> > It is unsafe to rely on *_enabled() helpers before the accelerator has
> > been initialized, ie, accel_init_machine() has succeeded, because they
> > always return false. But it is still possible to end up calling them
> > indirectly by inadvertance, and cause QEMU to misbehave.
> > 
> > This patch causes QEMU to abort if we try to check for an accelerator
> > before it has been set up. This will help to catch bugs earlier.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > 
> > This patch was motivated by an regression we're currently fixing in
> > spapr because of an early use of kvm_enabled(). David suggested to
> > post this patch separately:
> > 
> > https://lists.nongnu.org/archive/html/qemu-ppc/2018-06/msg01136.html
> > 
> > v2: - dropped change in qom/cpu.c (useless header inclusion)
> >     - only #include "sysemu/kvm.h" if we actually need it
> >     - added David's R-b from v1 because changes in v2 are minor
> 
> This adds a function call on possibly hot paths.  Can you make it inline?
> 
> Also asserting current_machine != NULL is not necessary, since you're
> immediately dereferencing it.

Is there a practical way to simply initialize the accelerators earlier
in startup sequence, so we just remove or at least reduce, the liklihood
of accessing it too early ?

Regards,
Daniel
Paolo Bonzini June 29, 2018, 10:40 a.m. UTC | #3
On 29/06/2018 12:39, Daniel P. Berrangé wrote:
>> Also asserting current_machine != NULL is not necessary, since you're
>> immediately dereferencing it.
> Is there a practical way to simply initialize the accelerators earlier
> in startup sequence, so we just remove or at least reduce, the liklihood
> of accessing it too early ?

We can try, though not for 3.0 of course.

Paolo
Greg Kurz June 29, 2018, 10:48 a.m. UTC | #4
On Fri, 29 Jun 2018 12:35:09 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 29/06/2018 12:29, Greg Kurz wrote:
> > It is unsafe to rely on *_enabled() helpers before the accelerator has
> > been initialized, ie, accel_init_machine() has succeeded, because they
> > always return false. But it is still possible to end up calling them
> > indirectly by inadvertance, and cause QEMU to misbehave.
> > 
> > This patch causes QEMU to abort if we try to check for an accelerator
> > before it has been set up. This will help to catch bugs earlier.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> > 
> > This patch was motivated by an regression we're currently fixing in
> > spapr because of an early use of kvm_enabled(). David suggested to
> > post this patch separately:
> > 
> > https://lists.nongnu.org/archive/html/qemu-ppc/2018-06/msg01136.html
> > 
> > v2: - dropped change in qom/cpu.c (useless header inclusion)
> >     - only #include "sysemu/kvm.h" if we actually need it
> >     - added David's R-b from v1 because changes in v2 are minor  
> 
> This adds a function call on possibly hot paths.  Can you make it inline?
> 

I'll check this out.

> Also asserting current_machine != NULL is not necessary, since you're
> immediately dereferencing it.
> 

assert() is more explicit IMHO and it allows to know what's going on
without using gdb, but I can drop it if you prefer.

> Thanks,
> 
> Paolo
> 
> > ---
> >  accel/accel.c          |    7 +++++++
> >  include/qemu-common.h  |    3 ++-
> >  include/sysemu/accel.h |    1 +
> >  include/sysemu/kvm.h   |    3 ++-
> >  stubs/Makefile.objs    |    1 +
> >  stubs/accel.c          |   14 ++++++++++++++
> >  target/i386/hax-all.c  |    2 +-
> >  target/i386/whpx-all.c |    2 +-
> >  8 files changed, 29 insertions(+), 4 deletions(-)
> >  create mode 100644 stubs/accel.c
> > 
> > diff --git a/accel/accel.c b/accel/accel.c
> > index 966b2d8f536c..27900aac9cc5 100644
> > --- a/accel/accel.c
> > +++ b/accel/accel.c
> > @@ -51,6 +51,13 @@ static AccelClass *accel_find(const char *opt_name)
> >      return ac;
> >  }
> >  
> > +bool assert_accelerator_initialized(bool allowed)
> > +{
> > +    assert(current_machine != NULL);
> > +    assert(current_machine->accelerator != NULL);
> > +    return allowed;
> > +}
> > +
> >  static int accel_init_machine(AccelClass *acc, MachineState *ms)
> >  {
> >      ObjectClass *oc = OBJECT_CLASS(acc);
> > diff --git a/include/qemu-common.h b/include/qemu-common.h
> > index 85f4749aefb7..01d5e4d97dbf 100644
> > --- a/include/qemu-common.h
> > +++ b/include/qemu-common.h
> > @@ -82,7 +82,8 @@ int qemu_openpty_raw(int *aslave, char *pty_name);
> >  extern bool tcg_allowed;
> >  void tcg_exec_init(unsigned long tb_size);
> >  #ifdef CONFIG_TCG
> > -#define tcg_enabled() (tcg_allowed)
> > +#include "sysemu/accel.h"
> > +#define tcg_enabled() (assert_accelerator_initialized(tcg_allowed))
> >  #else
> >  #define tcg_enabled() 0
> >  #endif
> > diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h
> > index 637358f43014..76965cb69cc9 100644
> > --- a/include/sysemu/accel.h
> > +++ b/include/sysemu/accel.h
> > @@ -71,5 +71,6 @@ void configure_accelerator(MachineState *ms);
> >  void accel_register_compat_props(AccelState *accel);
> >  /* Called just before os_setup_post (ie just before drop OS privs) */
> >  void accel_setup_post(MachineState *ms);
> > +bool assert_accelerator_initialized(bool allowed);
> >  
> >  #endif
> > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> > index 0b64b8e06786..5a2e59e99128 100644
> > --- a/include/sysemu/kvm.h
> > +++ b/include/sysemu/kvm.h
> > @@ -46,7 +46,8 @@ extern bool kvm_direct_msi_allowed;
> >  extern bool kvm_ioeventfd_any_length_allowed;
> >  extern bool kvm_msi_use_devid;
> >  
> > -#define kvm_enabled()           (kvm_allowed)
> > +#include "sysemu/accel.h"
> > +#define kvm_enabled()           (assert_accelerator_initialized(kvm_allowed))
> >  /**
> >   * kvm_irqchip_in_kernel:
> >   *
> > diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
> > index 53d3f32cb258..2d5142287525 100644
> > --- a/stubs/Makefile.objs
> > +++ b/stubs/Makefile.objs
> > @@ -43,3 +43,4 @@ stub-obj-y += xen-common.o
> >  stub-obj-y += xen-hvm.o
> >  stub-obj-y += pci-host-piix.o
> >  stub-obj-y += ram-block.o
> > +stub-obj-y += accel.o
> > diff --git a/stubs/accel.c b/stubs/accel.c
> > new file mode 100644
> > index 000000000000..4f480f2d3f29
> > --- /dev/null
> > +++ b/stubs/accel.c
> > @@ -0,0 +1,14 @@
> > +/*
> > + * accel stubs
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "sysemu/accel.h"
> > +
> > +bool assert_accelerator_initialized(bool allowed)
> > +{
> > +    return allowed;
> > +}
> > diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
> > index d2e512856bb8..7c78bd7d094d 100644
> > --- a/target/i386/hax-all.c
> > +++ b/target/i386/hax-all.c
> > @@ -57,7 +57,7 @@ static int hax_arch_get_registers(CPUArchState *env);
> >  
> >  int hax_enabled(void)
> >  {
> > -    return hax_allowed;
> > +    return assert_accelerator_initialized(hax_allowed);
> >  }
> >  
> >  int valid_hax_tunnel_size(uint16_t size)
> > diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> > index 6b42096698ee..e7f6bc5958e7 100644
> > --- a/target/i386/whpx-all.c
> > +++ b/target/i386/whpx-all.c
> > @@ -1422,7 +1422,7 @@ static int whpx_accel_init(MachineState *ms)
> >  
> >  int whpx_enabled(void)
> >  {
> > -    return whpx_allowed;
> > +    return assert_accelerator_initialized(whpx_allowed);
> >  }
> >  
> >  static void whpx_accel_class_init(ObjectClass *oc, void *data)
> >   
>
Greg Kurz June 29, 2018, 11:07 a.m. UTC | #5
On Fri, 29 Jun 2018 12:40:40 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 29/06/2018 12:39, Daniel P. Berrangé wrote:
> >> Also asserting current_machine != NULL is not necessary, since you're
> >> immediately dereferencing it.  
> > Is there a practical way to simply initialize the accelerators earlier
> > in startup sequence, so we just remove or at least reduce, the liklihood
> > of accessing it too early ?  
> 
> We can try, though not for 3.0 of course.
> 

FWIW, the motivation for this patch was kvm_enabled() being called under
the class_init function of the machine TypeInfo. This happens way earlier
than accelerator init. Not sure this is doable, but I can have a look.

> Paolo
Paolo Bonzini June 29, 2018, 11:08 a.m. UTC | #6
On 29/06/2018 13:07, Greg Kurz wrote:
>>>> Also asserting current_machine != NULL is not necessary, since you're
>>>> immediately dereferencing it.  
>>> Is there a practical way to simply initialize the accelerators earlier
>>> in startup sequence, so we just remove or at least reduce, the liklihood
>>> of accessing it too early ?  
>> We can try, though not for 3.0 of course.
>>
> FWIW, the motivation for this patch was kvm_enabled() being called under
> the class_init function of the machine TypeInfo. This happens way earlier
> than accelerator init. Not sure this is doable, but I can have a look.
> 

Probably not, that's way too early indeed.

Paolo
Daniel P. Berrangé June 29, 2018, 11:14 a.m. UTC | #7
On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
> On 29/06/2018 13:07, Greg Kurz wrote:
> >>>> Also asserting current_machine != NULL is not necessary, since you're
> >>>> immediately dereferencing it.  
> >>> Is there a practical way to simply initialize the accelerators earlier
> >>> in startup sequence, so we just remove or at least reduce, the liklihood
> >>> of accessing it too early ?  
> >> We can try, though not for 3.0 of course.
> >>
> > FWIW, the motivation for this patch was kvm_enabled() being called under
> > the class_init function of the machine TypeInfo. This happens way earlier
> > than accelerator init. Not sure this is doable, but I can have a look.
> > 
> 
> Probably not, that's way too early indeed.

Yeah, doing anything non-trivial in class_init is just asking for trouble,
as conceivably nothing is initialized at that point. 

Regards,
Daniel
Cédric Le Goater June 29, 2018, 11:42 a.m. UTC | #8
On 06/29/2018 01:14 PM, Daniel P. Berrangé wrote:
> On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
>> On 29/06/2018 13:07, Greg Kurz wrote:
>>>>>> Also asserting current_machine != NULL is not necessary, since you're
>>>>>> immediately dereferencing it.  
>>>>> Is there a practical way to simply initialize the accelerators earlier
>>>>> in startup sequence, so we just remove or at least reduce, the liklihood
>>>>> of accessing it too early ?  
>>>> We can try, though not for 3.0 of course.
>>>>
>>> FWIW, the motivation for this patch was kvm_enabled() being called under
>>> the class_init function of the machine TypeInfo. This happens way earlier
>>> than accelerator init. Not sure this is doable, but I can have a look.
>>>
>>
>> Probably not, that's way too early indeed.
> 
> Yeah, doing anything non-trivial in class_init is just asking for trouble,
> as conceivably nothing is initialized at that point. 

May be we should consider having a set of binaries for each accelerator,
KVM, TCG, etc. That would simplify a lot of the init path of the machines
and also of some of the models which are trying to initialize in one mode 
or the other.

Hybrid machines would still be possible, like using KVM for the CPUs and
an emulated model for some device. But the definitions would be static, 
not guessed from what is available or not. 

C.
Paolo Bonzini June 29, 2018, 11:47 a.m. UTC | #9
On 29/06/2018 13:42, Cédric Le Goater wrote:
>> Yeah, doing anything non-trivial in class_init is just asking for trouble,
>> as conceivably nothing is initialized at that point. 
>
> May be we should consider having a set of binaries for each accelerator,
> KVM, TCG, etc. That would simplify a lot of the init path of the machines
> and also of some of the models which are trying to initialize in one mode 
> or the other.

That would prevent things like "-machine accel=kvm:tcg", which try KVM
and fall back to TCG if it is not available.

Paolo

> Hybrid machines would still be possible, like using KVM for the CPUs and
> an emulated model for some device. But the definitions would be static, 
> not guessed from what is available or not.
Igor Mammedov June 29, 2018, 3:18 p.m. UTC | #10
On Fri, 29 Jun 2018 12:14:05 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
> > On 29/06/2018 13:07, Greg Kurz wrote:  
> > >>>> Also asserting current_machine != NULL is not necessary, since you're
> > >>>> immediately dereferencing it.    
> > >>> Is there a practical way to simply initialize the accelerators earlier
> > >>> in startup sequence, so we just remove or at least reduce, the liklihood
> > >>> of accessing it too early ?    
> > >> We can try, though not for 3.0 of course.
> > >>  
> > > FWIW, the motivation for this patch was kvm_enabled() being called under
> > > the class_init function of the machine TypeInfo. This happens way earlier
> > > than accelerator init. Not sure this is doable, but I can have a look.
> > >   
> > 
> > Probably not, that's way too early indeed.  
> 
> Yeah, doing anything non-trivial in class_init is just asking for trouble,
> as conceivably nothing is initialized at that point. 
isn't class_init called lazily? (so it might actually work as far as type
isn't touched before kvm is initialized)

> 
> Regards,
> Daniel
Daniel P. Berrangé June 29, 2018, 3:19 p.m. UTC | #11
On Fri, Jun 29, 2018 at 05:18:21PM +0200, Igor Mammedov wrote:
> On Fri, 29 Jun 2018 12:14:05 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
> > > On 29/06/2018 13:07, Greg Kurz wrote:  
> > > >>>> Also asserting current_machine != NULL is not necessary, since you're
> > > >>>> immediately dereferencing it.    
> > > >>> Is there a practical way to simply initialize the accelerators earlier
> > > >>> in startup sequence, so we just remove or at least reduce, the liklihood
> > > >>> of accessing it too early ?    
> > > >> We can try, though not for 3.0 of course.
> > > >>  
> > > > FWIW, the motivation for this patch was kvm_enabled() being called under
> > > > the class_init function of the machine TypeInfo. This happens way earlier
> > > > than accelerator init. Not sure this is doable, but I can have a look.
> > > >   
> > > 
> > > Probably not, that's way too early indeed.  
> > 
> > Yeah, doing anything non-trivial in class_init is just asking for trouble,
> > as conceivably nothing is initialized at that point. 
> isn't class_init called lazily? (so it might actually work as far as type
> isn't touched before kvm is initialized)

Yes, but I don't think anything should rely on it being lazy, as such an
assumption is liable to break any time code is refactored, subtley changing
class init ordering.

Regards,
Daniel
Eduardo Habkost June 29, 2018, 8:03 p.m. UTC | #12
On Fri, Jun 29, 2018 at 11:39:10AM +0100, Daniel P. Berrangé wrote:
> On Fri, Jun 29, 2018 at 12:35:09PM +0200, Paolo Bonzini wrote:
> > On 29/06/2018 12:29, Greg Kurz wrote:
> > > It is unsafe to rely on *_enabled() helpers before the accelerator has
> > > been initialized, ie, accel_init_machine() has succeeded, because they
> > > always return false. But it is still possible to end up calling them
> > > indirectly by inadvertance, and cause QEMU to misbehave.
> > > 
> > > This patch causes QEMU to abort if we try to check for an accelerator
> > > before it has been set up. This will help to catch bugs earlier.
> > > 
> > > Signed-off-by: Greg Kurz <groug@kaod.org>
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > > 
> > > This patch was motivated by an regression we're currently fixing in
> > > spapr because of an early use of kvm_enabled(). David suggested to
> > > post this patch separately:
> > > 
> > > https://lists.nongnu.org/archive/html/qemu-ppc/2018-06/msg01136.html
> > > 
> > > v2: - dropped change in qom/cpu.c (useless header inclusion)
> > >     - only #include "sysemu/kvm.h" if we actually need it
> > >     - added David's R-b from v1 because changes in v2 are minor
> > 
> > This adds a function call on possibly hot paths.  Can you make it inline?
> > 
> > Also asserting current_machine != NULL is not necessary, since you're
> > immediately dereferencing it.
> 
> Is there a practical way to simply initialize the accelerators earlier
> in startup sequence, so we just remove or at least reduce, the liklihood
> of accessing it too early ?

We can reduce it, but not eliminate it: it would still be
possible to use the *_enabled() macros too early, before we parse
all command-line options (on QOM class_init, for example).
Eduardo Habkost June 29, 2018, 8:09 p.m. UTC | #13
On Fri, Jun 29, 2018 at 01:47:59PM +0200, Paolo Bonzini wrote:
> On 29/06/2018 13:42, Cédric Le Goater wrote:
> >> Yeah, doing anything non-trivial in class_init is just asking for trouble,
> >> as conceivably nothing is initialized at that point. 
> >
> > May be we should consider having a set of binaries for each accelerator,
> > KVM, TCG, etc. That would simplify a lot of the init path of the machines
> > and also of some of the models which are trying to initialize in one mode 
> > or the other.
> 
> That would prevent things like "-machine accel=kvm:tcg", which try KVM
> and fall back to TCG if it is not available.

We could at least make it easier for packagers to choose the
features available on the binaries they build (including
accelerators, machine-types and device models).

As we already have distributions that build a "qemu-kvm" binary
that uses KVM by default, we could make this a supported feature
of the build system instead of requiring downstream
makefile/configure hacks.
Eduardo Habkost June 29, 2018, 8:16 p.m. UTC | #14
On Fri, Jun 29, 2018 at 05:18:21PM +0200, Igor Mammedov wrote:
> On Fri, 29 Jun 2018 12:14:05 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
> > > On 29/06/2018 13:07, Greg Kurz wrote:  
> > > >>>> Also asserting current_machine != NULL is not necessary, since you're
> > > >>>> immediately dereferencing it.    
> > > >>> Is there a practical way to simply initialize the accelerators earlier
> > > >>> in startup sequence, so we just remove or at least reduce, the liklihood
> > > >>> of accessing it too early ?    
> > > >> We can try, though not for 3.0 of course.
> > > >>  
> > > > FWIW, the motivation for this patch was kvm_enabled() being called under
> > > > the class_init function of the machine TypeInfo. This happens way earlier
> > > > than accelerator init. Not sure this is doable, but I can have a look.
> > > >   
> > > 
> > > Probably not, that's way too early indeed.  
> > 
> > Yeah, doing anything non-trivial in class_init is just asking for trouble,
> > as conceivably nothing is initialized at that point. 
> isn't class_init called lazily? (so it might actually work as far as type
> isn't touched before kvm is initialized)

You have a good point: this means class_init bugs won't always
trigger the assert because of lazy class_init.  It would be a
good idea to add a functional test that calls qom-list-types
using --preconfig to try to trigger them.
Eduardo Habkost June 29, 2018, 8:34 p.m. UTC | #15
On Fri, Jun 29, 2018 at 05:16:52PM -0300, Eduardo Habkost wrote:
> On Fri, Jun 29, 2018 at 05:18:21PM +0200, Igor Mammedov wrote:
> > On Fri, 29 Jun 2018 12:14:05 +0100
> > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > 
> > > On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:
> > > > On 29/06/2018 13:07, Greg Kurz wrote:  
> > > > >>>> Also asserting current_machine != NULL is not necessary, since you're
> > > > >>>> immediately dereferencing it.    
> > > > >>> Is there a practical way to simply initialize the accelerators earlier
> > > > >>> in startup sequence, so we just remove or at least reduce, the liklihood
> > > > >>> of accessing it too early ?    
> > > > >> We can try, though not for 3.0 of course.
> > > > >>  
> > > > > FWIW, the motivation for this patch was kvm_enabled() being called under
> > > > > the class_init function of the machine TypeInfo. This happens way earlier
> > > > > than accelerator init. Not sure this is doable, but I can have a look.
> > > > >   
> > > > 
> > > > Probably not, that's way too early indeed.  
> > > 
> > > Yeah, doing anything non-trivial in class_init is just asking for trouble,
> > > as conceivably nothing is initialized at that point. 
> > isn't class_init called lazily? (so it might actually work as far as type
> > isn't touched before kvm is initialized)
> 
> You have a good point: this means class_init bugs won't always
> trigger the assert because of lazy class_init.  It would be a
> good idea to add a functional test that calls qom-list-types
> using --preconfig to try to trigger them.

Heh, I just noticed that the first thing we do immediately after
parsing command-line options is calling:

 select_machine()
 find_default_machine()
 object_class_get_list()
 object_class_foreach()
 g_hash_table_foreach()
 object_class_foreach_tramp()
 type_initialize()

...which will call class_init for every single QOM type in QEMU.
Greg Kurz July 2, 2018, 1:44 p.m. UTC | #16
On Fri, 29 Jun 2018 17:34:24 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Fri, Jun 29, 2018 at 05:16:52PM -0300, Eduardo Habkost wrote:
> > On Fri, Jun 29, 2018 at 05:18:21PM +0200, Igor Mammedov wrote:  
> > > On Fri, 29 Jun 2018 12:14:05 +0100
> > > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >   
> > > > On Fri, Jun 29, 2018 at 01:08:38PM +0200, Paolo Bonzini wrote:  
> > > > > On 29/06/2018 13:07, Greg Kurz wrote:    
> > > > > >>>> Also asserting current_machine != NULL is not necessary, since you're
> > > > > >>>> immediately dereferencing it.      
> > > > > >>> Is there a practical way to simply initialize the accelerators earlier
> > > > > >>> in startup sequence, so we just remove or at least reduce, the liklihood
> > > > > >>> of accessing it too early ?      
> > > > > >> We can try, though not for 3.0 of course.
> > > > > >>    
> > > > > > FWIW, the motivation for this patch was kvm_enabled() being called under
> > > > > > the class_init function of the machine TypeInfo. This happens way earlier
> > > > > > than accelerator init. Not sure this is doable, but I can have a look.
> > > > > >     
> > > > > 
> > > > > Probably not, that's way too early indeed.    
> > > > 
> > > > Yeah, doing anything non-trivial in class_init is just asking for trouble,
> > > > as conceivably nothing is initialized at that point.   
> > > isn't class_init called lazily? (so it might actually work as far as type
> > > isn't touched before kvm is initialized)  
> > 
> > You have a good point: this means class_init bugs won't always
> > trigger the assert because of lazy class_init.  It would be a
> > good idea to add a functional test that calls qom-list-types
> > using --preconfig to try to trigger them.  
> 
> Heh, I just noticed that the first thing we do immediately after
> parsing command-line options is calling:
> 
>  select_machine()
>  find_default_machine()
>  object_class_get_list()
>  object_class_foreach()
>  g_hash_table_foreach()
>  object_class_foreach_tramp()
>  type_initialize()
> 
> ...which will call class_init for every single QOM type in QEMU.
> 

Yes indeed. IIUC, this would trigger the assert if any QOM type
calls a ${acc}_enabled() from its class_init then.

BTW, I've just realized it triggers on x86:

#0  0x00007fffef7f0660 in raise () at /lib64/libc.so.6
#1  0x00007fffef7f1c41 in abort () at /lib64/libc.so.6
#2  0x00007fffef7e8f7a in __assert_fail_base () at /lib64/libc.so.6
#3  0x00007fffef7e8ff2 in  () at /lib64/libc.so.6
#4  0x0000555555860cdd in assert_accelerator_initialized (allowed=false) at /home/greg/Work/qemu/qemu-master/accel/accel.c:56
#5  0x000055555593c4db in host_x86_cpu_class_init (oc=0x5555566864c0, data=0x0) at /home/greg/Work/qemu/qemu-master/target/i386/cpu.c:2841
#6  0x0000555555c93ff3 in type_initialize (ti=0x5555565ef3c0) at /home/greg/Work/qemu/qemu-master/qom/object.c:342
#7  0x0000555555c95143 in object_class_foreach_tramp (key=0x5555565ca630, value=0x5555565ef3c0, opaque=0x7fffffffd9b0) at /home/greg/Work/qemu/qemu-master/qom/object.c:813
#8  0x00007ffff76afec0 in g_hash_table_foreach () at /lib64/libglib-2.0.so.0
#9  0x0000555555c95222 in object_class_foreach (fn=0x555555c95373 <object_class_get_list_tramp>, implements_type=0x555555e31996 "machine", include_abstract=false, opaque=0x7fffffffda00) at /home/greg/Work/qemu/qemu-master/qom/object.c:835
#10 0x0000555555c953f1 in object_class_get_list (implements_type=0x555555e31996 "machine", include_abstract=false) at /home/greg/Work/qemu/qemu-master/qom/object.c:889
#11 0x00005555559c1608 in find_default_machine () at /home/greg/Work/qemu/qemu-master/vl.c:1416
#12 0x00005555559c5517 in select_machine () at /home/greg/Work/qemu/qemu-master/vl.c:2668
#13 0x00005555559c846d in main (argc=1, argv=0x7fffffffdd88, envp=0x7fffffffdd98) at /home/greg/Work/qemu/qemu-master/vl.c:3987

static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
{
    X86CPUClass *xcc = X86_CPU_CLASS(oc);

    xcc->host_cpuid_required = true;
    xcc->ordering = 8;

    if (kvm_enabled()) {
        xcc->model_description =
            "KVM processor with all supported host features ";
    } else if (hvf_enabled()) {
        xcc->model_description =
            "HVF processor with all supported host features ";
    }
}

And, indeed, since commit d6dcc5583e7 (QEMU 2.11), -cpu ? shows the base
class description:

x86             host  Enables all features supported by the accelerator in the current host

instead of the expected:

x86             host  KVM processor with all supported host features 

This is a good illustration on how a bug can go unnoticed, but would be
caught with this patch. So I'll polish v3 and post it ASAP.

Cheers,

--
Greg
diff mbox series

Patch

diff --git a/accel/accel.c b/accel/accel.c
index 966b2d8f536c..27900aac9cc5 100644
--- a/accel/accel.c
+++ b/accel/accel.c
@@ -51,6 +51,13 @@  static AccelClass *accel_find(const char *opt_name)
     return ac;
 }
 
+bool assert_accelerator_initialized(bool allowed)
+{
+    assert(current_machine != NULL);
+    assert(current_machine->accelerator != NULL);
+    return allowed;
+}
+
 static int accel_init_machine(AccelClass *acc, MachineState *ms)
 {
     ObjectClass *oc = OBJECT_CLASS(acc);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 85f4749aefb7..01d5e4d97dbf 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -82,7 +82,8 @@  int qemu_openpty_raw(int *aslave, char *pty_name);
 extern bool tcg_allowed;
 void tcg_exec_init(unsigned long tb_size);
 #ifdef CONFIG_TCG
-#define tcg_enabled() (tcg_allowed)
+#include "sysemu/accel.h"
+#define tcg_enabled() (assert_accelerator_initialized(tcg_allowed))
 #else
 #define tcg_enabled() 0
 #endif
diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h
index 637358f43014..76965cb69cc9 100644
--- a/include/sysemu/accel.h
+++ b/include/sysemu/accel.h
@@ -71,5 +71,6 @@  void configure_accelerator(MachineState *ms);
 void accel_register_compat_props(AccelState *accel);
 /* Called just before os_setup_post (ie just before drop OS privs) */
 void accel_setup_post(MachineState *ms);
+bool assert_accelerator_initialized(bool allowed);
 
 #endif
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 0b64b8e06786..5a2e59e99128 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -46,7 +46,8 @@  extern bool kvm_direct_msi_allowed;
 extern bool kvm_ioeventfd_any_length_allowed;
 extern bool kvm_msi_use_devid;
 
-#define kvm_enabled()           (kvm_allowed)
+#include "sysemu/accel.h"
+#define kvm_enabled()           (assert_accelerator_initialized(kvm_allowed))
 /**
  * kvm_irqchip_in_kernel:
  *
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 53d3f32cb258..2d5142287525 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -43,3 +43,4 @@  stub-obj-y += xen-common.o
 stub-obj-y += xen-hvm.o
 stub-obj-y += pci-host-piix.o
 stub-obj-y += ram-block.o
+stub-obj-y += accel.o
diff --git a/stubs/accel.c b/stubs/accel.c
new file mode 100644
index 000000000000..4f480f2d3f29
--- /dev/null
+++ b/stubs/accel.c
@@ -0,0 +1,14 @@ 
+/*
+ * accel stubs
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/accel.h"
+
+bool assert_accelerator_initialized(bool allowed)
+{
+    return allowed;
+}
diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index d2e512856bb8..7c78bd7d094d 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -57,7 +57,7 @@  static int hax_arch_get_registers(CPUArchState *env);
 
 int hax_enabled(void)
 {
-    return hax_allowed;
+    return assert_accelerator_initialized(hax_allowed);
 }
 
 int valid_hax_tunnel_size(uint16_t size)
diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
index 6b42096698ee..e7f6bc5958e7 100644
--- a/target/i386/whpx-all.c
+++ b/target/i386/whpx-all.c
@@ -1422,7 +1422,7 @@  static int whpx_accel_init(MachineState *ms)
 
 int whpx_enabled(void)
 {
-    return whpx_allowed;
+    return assert_accelerator_initialized(whpx_allowed);
 }
 
 static void whpx_accel_class_init(ObjectClass *oc, void *data)