Patchwork [15/21] qemu-kvm: Clean up mpstate synchronization

login
register
mail settings
Submitter Jan Kiszka
Date Feb. 2, 2010, 8:19 a.m.
Message ID <5104499c6d629296d385179f95163e7c16e1ec01.1265098707.git.jan.kiszka@siemens.com>
Download mbox | patch
Permalink /patch/44258/
State New
Headers show

Comments

Jan Kiszka - Feb. 2, 2010, 8:19 a.m.
Push mpstate reading/writing into kvm_arch_load/save_regs and, on x86,
properly synchronize with halted in the accessor functions.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 hw/apic.c             |    7 ----
 qemu-kvm-ia64.c       |    4 ++-
 qemu-kvm-x86.c        |   88 +++++++++++++++++++++++++++---------------------
 qemu-kvm.c            |   30 -----------------
 qemu-kvm.h            |   15 --------
 target-i386/machine.c |    6 ---
 target-ia64/machine.c |    3 ++
 7 files changed, 55 insertions(+), 98 deletions(-)
Gleb Natapov - Feb. 2, 2010, 12:23 p.m.
On Tue, Feb 02, 2010 at 09:19:01AM +0100, Jan Kiszka wrote:
> Push mpstate reading/writing into kvm_arch_load/save_regs and, on x86,
> properly synchronize with halted in the accessor functions.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  hw/apic.c             |    7 ----
>  qemu-kvm-ia64.c       |    4 ++-
>  qemu-kvm-x86.c        |   88 +++++++++++++++++++++++++++---------------------
>  qemu-kvm.c            |   30 -----------------
>  qemu-kvm.h            |   15 --------
>  target-i386/machine.c |    6 ---
>  target-ia64/machine.c |    3 ++
>  7 files changed, 55 insertions(+), 98 deletions(-)
> 
> diff --git a/hw/apic.c b/hw/apic.c
> index 3e03e10..092c61e 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -507,13 +507,6 @@ void apic_init_reset(CPUState *env)
>      s->wait_for_sipi = 1;
>  
>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
> -#ifdef KVM_CAP_MP_STATE
> -    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
> -        env->mp_state
> -            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
> -        kvm_load_mpstate(env);
> -    }
> -#endif
>  }
>  
>  static void apic_startup(APICState *s, int vector_num)
> diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
> index fc8110e..39bcbeb 100644
> --- a/qemu-kvm-ia64.c
> +++ b/qemu-kvm-ia64.c
> @@ -124,7 +124,9 @@ void kvm_arch_cpu_reset(CPUState *env)
>  {
>      if (kvm_irqchip_in_kernel(kvm_context)) {
>  #ifdef KVM_CAP_MP_STATE
> -	kvm_reset_mpstate(env->kvm_cpu_state.vcpu_ctx);
> +        struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
> +        };
> +        kvm_set_mpstate(env, &mp_state);
>  #endif
>      } else {
>  	env->interrupt_request &= ~CPU_INTERRUPT_HARD;
> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
> index 63cd095..6b5895f 100644
> --- a/qemu-kvm-x86.c
> +++ b/qemu-kvm-x86.c
> @@ -754,6 +754,48 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
>          return 0;
>  }
>  
> +static void kvm_arch_save_mpstate(CPUState *env)
> +{
> +#ifdef KVM_CAP_MP_STATE
> +    int r;
> +    struct kvm_mp_state mp_state;
> +
> +    r = kvm_get_mpstate(env, &mp_state);
> +    if (r < 0) {
> +        env->mp_state = -1;
> +    } else {
> +        env->mp_state = mp_state.mp_state;
> +        if (kvm_irqchip_in_kernel()) {
> +            env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
> +        }
> +    }
> +#else
> +    env->mp_state = -1;
> +#endif
> +}
> +
> +static void kvm_arch_load_mpstate(CPUState *env)
> +{
> +#ifdef KVM_CAP_MP_STATE
> +    struct kvm_mp_state mp_state;
> +
> +    /*
> +     * -1 indicates that the host did not support GET_MP_STATE ioctl,
> +     *  so don't touch it.
> +     */
> +    if (env->mp_state != -1) {
> +        if (kvm_irqchip_in_kernel()) {
> +            env->mp_state = env->halted ? KVM_MP_STATE_UNINITIALIZED :
> +                                          KVM_MP_STATE_RUNNABLE;
When irqchip is in kernel env->halted doesn't contain any relevant
information, so this is incorrect. Actually env->halted is updated only
to show correct cpu state during "info cpus".

> +            /* Avoid deadlock: no user space IRQ will ever clear it. */
And this comment explains why looking at env->halt when irqchip is in
kernel is wrong :)

> +            env->halted = 0;
> +        }
> +        mp_state.mp_state = env->mp_state;
> +        kvm_set_mpstate(env, &mp_state);
> +    }
> +#endif
> +}
> +
>  static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
>  {
>      lhs->selector = rhs->selector;
> @@ -926,6 +968,10 @@ void kvm_arch_load_regs(CPUState *env, int level)
>      rc = kvm_set_msrs(env, msrs, n);
>      if (rc == -1)
>          perror("kvm_set_msrs FAILED");
> +
> +    if (level >= KVM_PUT_RESET_STATE) {
> +        kvm_arch_load_mpstate(env);
> +    }
>  }
>  
>  void kvm_load_tsc(CPUState *env)
> @@ -940,36 +986,6 @@ void kvm_load_tsc(CPUState *env)
>          perror("kvm_set_tsc FAILED.\n");
>  }
>  
> -void kvm_arch_save_mpstate(CPUState *env)
> -{
> -#ifdef KVM_CAP_MP_STATE
> -    int r;
> -    struct kvm_mp_state mp_state;
> -
> -    r = kvm_get_mpstate(env, &mp_state);
> -    if (r < 0)
> -        env->mp_state = -1;
> -    else
> -        env->mp_state = mp_state.mp_state;
> -#else
> -    env->mp_state = -1;
> -#endif
> -}
> -
> -void kvm_arch_load_mpstate(CPUState *env)
> -{
> -#ifdef KVM_CAP_MP_STATE
> -    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
> -
> -    /*
> -     * -1 indicates that the host did not support GET_MP_STATE ioctl,
> -     *  so don't touch it.
> -     */
> -    if (env->mp_state != -1)
> -        kvm_set_mpstate(env, &mp_state);
> -#endif
> -}
> -
>  void kvm_arch_save_regs(CPUState *env)
>  {
>      struct kvm_regs regs;
> @@ -1366,15 +1382,9 @@ void kvm_arch_cpu_reset(CPUState *env)
>  {
>      kvm_arch_reset_vcpu(env);
>      kvm_put_vcpu_events(env);
> -    if (!cpu_is_bsp(env)) {
> -	if (kvm_irqchip_in_kernel()) {
> -#ifdef KVM_CAP_MP_STATE
> -	    kvm_reset_mpstate(env);
> -#endif
> -	} else {
> -	    env->interrupt_request &= ~CPU_INTERRUPT_HARD;
> -	    env->halted = 1;
> -	}
> +    if (!cpu_is_bsp(env) && !kvm_irqchip_in_kernel()) {
> +        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
> +        env->halted = 1;
>      }
>  }
>  
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 53030f1..efa6a29 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -1579,36 +1579,6 @@ void kvm_update_interrupt_request(CPUState *env)
>      }
>  }
>  
> -static void kvm_do_load_mpstate(void *_env)
> -{
> -    CPUState *env = _env;
> -
> -    kvm_arch_load_mpstate(env);
> -}
> -
> -void kvm_load_mpstate(CPUState *env)
> -{
> -    if (kvm_enabled() && qemu_system_ready && kvm_vcpu_inited(env))
> -        on_vcpu(env, kvm_do_load_mpstate, env);
> -}
> -
> -static void kvm_do_save_mpstate(void *_env)
> -{
> -    CPUState *env = _env;
> -
> -    kvm_arch_save_mpstate(env);
> -#ifdef KVM_CAP_MP_STATE
> -    if (kvm_irqchip_in_kernel())
> -        env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
> -#endif
> -}
> -
> -void kvm_save_mpstate(CPUState *env)
> -{
> -    if (kvm_enabled())
> -        on_vcpu(env, kvm_do_save_mpstate, env);
> -}
> -
>  int kvm_cpu_exec(CPUState *env)
>  {
>      int r;
> diff --git a/qemu-kvm.h b/qemu-kvm.h
> index 6d785a0..aa7bcce 100644
> --- a/qemu-kvm.h
> +++ b/qemu-kvm.h
> @@ -299,16 +299,6 @@ int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state);
>   *
>   */
>  int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state);
> -/*!
> - *  * \brief Reset VCPU MP state
> - *
> - */
> -static inline int kvm_reset_mpstate(CPUState *env)
> -{
> -    struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
> -    };
> -    return kvm_set_mpstate(env, &mp_state);
> -}
>  #endif
>  
>  /*!
> @@ -874,8 +864,6 @@ static inline void kvm_inject_x86_mce(CPUState *cenv, int bank,
>  int kvm_main_loop(void);
>  int kvm_init_ap(void);
>  int kvm_vcpu_inited(CPUState *env);
> -void kvm_load_mpstate(CPUState *env);
> -void kvm_save_mpstate(CPUState *env);
>  void kvm_apic_init(CPUState *env);
>  /* called from vcpu initialization */
>  void qemu_kvm_load_lapic(CPUState *env);
> @@ -909,8 +897,6 @@ int kvm_arch_qemu_create_context(void);
>  
>  void kvm_arch_save_regs(CPUState *env);
>  void kvm_arch_load_regs(CPUState *env, int level);
> -void kvm_arch_load_mpstate(CPUState *env);
> -void kvm_arch_save_mpstate(CPUState *env);
>  int kvm_arch_has_work(CPUState *env);
>  void kvm_arch_process_irqchip_events(CPUState *env);
>  int kvm_arch_try_push_interrupts(void *opaque);
> @@ -979,7 +965,6 @@ void kvm_load_tsc(CPUState *env);
>  #ifdef TARGET_I386
>  #define qemu_kvm_has_pit_state2() (0)
>  #endif
> -#define kvm_save_mpstate(env)   do {} while(0)
>  #define qemu_kvm_cpu_stop(env) do {} while(0)
>  static inline void kvm_load_tsc(CPUState *env)
>  {
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index bebde82..61e6a87 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -323,7 +323,6 @@ static void cpu_pre_save(void *opaque)
>      int i;
>  
>      if (kvm_enabled()) {
> -        kvm_save_mpstate(env);
>          kvm_get_vcpu_events(env);
>      }
>  
> @@ -362,12 +361,7 @@ static int cpu_post_load(void *opaque, int version_id)
>      tlb_flush(env, 1);
>  
>      if (kvm_enabled()) {
> -        /* when in-kernel irqchip is used, env->halted causes deadlock
> -           because no userspace IRQs will ever clear this flag */
> -        env->halted = 0;
> -
>          kvm_load_tsc(env);
> -        kvm_load_mpstate(env);
>          kvm_put_vcpu_events(env);
>      }
>  
> diff --git a/target-ia64/machine.c b/target-ia64/machine.c
> index fdbeeef..8cf5bdd 100644
> --- a/target-ia64/machine.c
> +++ b/target-ia64/machine.c
> @@ -4,6 +4,9 @@
>  #include "exec-all.h"
>  #include "qemu-kvm.h"
>  
> +void kvm_arch_save_mpstate(CPUState *env);
> +void kvm_arch_load_mpstate(CPUState *env);
> +
>  void cpu_save(QEMUFile *f, void *opaque)
>  {
>      CPUState *env = opaque;
> -- 
> 1.6.0.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
Jan Kiszka - Feb. 2, 2010, 12:31 p.m.
Gleb Natapov wrote:
> On Tue, Feb 02, 2010 at 09:19:01AM +0100, Jan Kiszka wrote:
>> Push mpstate reading/writing into kvm_arch_load/save_regs and, on x86,
>> properly synchronize with halted in the accessor functions.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  hw/apic.c             |    7 ----
>>  qemu-kvm-ia64.c       |    4 ++-
>>  qemu-kvm-x86.c        |   88 +++++++++++++++++++++++++++---------------------
>>  qemu-kvm.c            |   30 -----------------
>>  qemu-kvm.h            |   15 --------
>>  target-i386/machine.c |    6 ---
>>  target-ia64/machine.c |    3 ++
>>  7 files changed, 55 insertions(+), 98 deletions(-)
>>
>> diff --git a/hw/apic.c b/hw/apic.c
>> index 3e03e10..092c61e 100644
>> --- a/hw/apic.c
>> +++ b/hw/apic.c
>> @@ -507,13 +507,6 @@ void apic_init_reset(CPUState *env)
>>      s->wait_for_sipi = 1;
>>  
>>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
>> -#ifdef KVM_CAP_MP_STATE
>> -    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
>> -        env->mp_state
>> -            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
>> -        kvm_load_mpstate(env);
>> -    }
>> -#endif
>>  }
>>  
>>  static void apic_startup(APICState *s, int vector_num)
>> diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
>> index fc8110e..39bcbeb 100644
>> --- a/qemu-kvm-ia64.c
>> +++ b/qemu-kvm-ia64.c
>> @@ -124,7 +124,9 @@ void kvm_arch_cpu_reset(CPUState *env)
>>  {
>>      if (kvm_irqchip_in_kernel(kvm_context)) {
>>  #ifdef KVM_CAP_MP_STATE
>> -	kvm_reset_mpstate(env->kvm_cpu_state.vcpu_ctx);
>> +        struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
>> +        };
>> +        kvm_set_mpstate(env, &mp_state);
>>  #endif
>>      } else {
>>  	env->interrupt_request &= ~CPU_INTERRUPT_HARD;
>> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
>> index 63cd095..6b5895f 100644
>> --- a/qemu-kvm-x86.c
>> +++ b/qemu-kvm-x86.c
>> @@ -754,6 +754,48 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
>>          return 0;
>>  }
>>  
>> +static void kvm_arch_save_mpstate(CPUState *env)
>> +{
>> +#ifdef KVM_CAP_MP_STATE
>> +    int r;
>> +    struct kvm_mp_state mp_state;
>> +
>> +    r = kvm_get_mpstate(env, &mp_state);
>> +    if (r < 0) {
>> +        env->mp_state = -1;
>> +    } else {
>> +        env->mp_state = mp_state.mp_state;
>> +        if (kvm_irqchip_in_kernel()) {
>> +            env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
>> +        }
>> +    }
>> +#else
>> +    env->mp_state = -1;
>> +#endif
>> +}
>> +
>> +static void kvm_arch_load_mpstate(CPUState *env)
>> +{
>> +#ifdef KVM_CAP_MP_STATE
>> +    struct kvm_mp_state mp_state;
>> +
>> +    /*
>> +     * -1 indicates that the host did not support GET_MP_STATE ioctl,
>> +     *  so don't touch it.
>> +     */
>> +    if (env->mp_state != -1) {
>> +        if (kvm_irqchip_in_kernel()) {
>> +            env->mp_state = env->halted ? KVM_MP_STATE_UNINITIALIZED :
>> +                                          KVM_MP_STATE_RUNNABLE;
> When irqchip is in kernel env->halted doesn't contain any relevant
> information, so this is incorrect. Actually env->halted is updated only
> to show correct cpu state during "info cpus".

OK, copied from apic_init_reset, see above. So that hunk was probably at
least useless, and now it's harmfull. Will drop this and only sync from
mp_state -> halted.

Thanks,
Jan
Gleb Natapov - Feb. 2, 2010, 12:37 p.m.
On Tue, Feb 02, 2010 at 01:31:50PM +0100, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Tue, Feb 02, 2010 at 09:19:01AM +0100, Jan Kiszka wrote:
> >> Push mpstate reading/writing into kvm_arch_load/save_regs and, on x86,
> >> properly synchronize with halted in the accessor functions.
> >>
> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >> ---
> >>  hw/apic.c             |    7 ----
> >>  qemu-kvm-ia64.c       |    4 ++-
> >>  qemu-kvm-x86.c        |   88 +++++++++++++++++++++++++++---------------------
> >>  qemu-kvm.c            |   30 -----------------
> >>  qemu-kvm.h            |   15 --------
> >>  target-i386/machine.c |    6 ---
> >>  target-ia64/machine.c |    3 ++
> >>  7 files changed, 55 insertions(+), 98 deletions(-)
> >>
> >> diff --git a/hw/apic.c b/hw/apic.c
> >> index 3e03e10..092c61e 100644
> >> --- a/hw/apic.c
> >> +++ b/hw/apic.c
> >> @@ -507,13 +507,6 @@ void apic_init_reset(CPUState *env)
> >>      s->wait_for_sipi = 1;
> >>  
> >>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
> >> -#ifdef KVM_CAP_MP_STATE
> >> -    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
> >> -        env->mp_state
> >> -            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
> >> -        kvm_load_mpstate(env);
> >> -    }
> >> -#endif
> >>  }
> >>  
> >>  static void apic_startup(APICState *s, int vector_num)
> >> diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
> >> index fc8110e..39bcbeb 100644
> >> --- a/qemu-kvm-ia64.c
> >> +++ b/qemu-kvm-ia64.c
> >> @@ -124,7 +124,9 @@ void kvm_arch_cpu_reset(CPUState *env)
> >>  {
> >>      if (kvm_irqchip_in_kernel(kvm_context)) {
> >>  #ifdef KVM_CAP_MP_STATE
> >> -	kvm_reset_mpstate(env->kvm_cpu_state.vcpu_ctx);
> >> +        struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
> >> +        };
> >> +        kvm_set_mpstate(env, &mp_state);
> >>  #endif
> >>      } else {
> >>  	env->interrupt_request &= ~CPU_INTERRUPT_HARD;
> >> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
> >> index 63cd095..6b5895f 100644
> >> --- a/qemu-kvm-x86.c
> >> +++ b/qemu-kvm-x86.c
> >> @@ -754,6 +754,48 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
> >>          return 0;
> >>  }
> >>  
> >> +static void kvm_arch_save_mpstate(CPUState *env)
> >> +{
> >> +#ifdef KVM_CAP_MP_STATE
> >> +    int r;
> >> +    struct kvm_mp_state mp_state;
> >> +
> >> +    r = kvm_get_mpstate(env, &mp_state);
> >> +    if (r < 0) {
> >> +        env->mp_state = -1;
> >> +    } else {
> >> +        env->mp_state = mp_state.mp_state;
> >> +        if (kvm_irqchip_in_kernel()) {
> >> +            env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
> >> +        }
> >> +    }
> >> +#else
> >> +    env->mp_state = -1;
> >> +#endif
> >> +}
> >> +
> >> +static void kvm_arch_load_mpstate(CPUState *env)
> >> +{
> >> +#ifdef KVM_CAP_MP_STATE
> >> +    struct kvm_mp_state mp_state;
> >> +
> >> +    /*
> >> +     * -1 indicates that the host did not support GET_MP_STATE ioctl,
> >> +     *  so don't touch it.
> >> +     */
> >> +    if (env->mp_state != -1) {
> >> +        if (kvm_irqchip_in_kernel()) {
> >> +            env->mp_state = env->halted ? KVM_MP_STATE_UNINITIALIZED :
> >> +                                          KVM_MP_STATE_RUNNABLE;
> > When irqchip is in kernel env->halted doesn't contain any relevant
> > information, so this is incorrect. Actually env->halted is updated only
> > to show correct cpu state during "info cpus".
> 
> OK, copied from apic_init_reset, see above. So that hunk was probably at
> least useless, and now it's harmfull. Will drop this and only sync from
> mp_state -> halted.
> 
It was not useless in apic_init_reset it was a shortcut for:
env->mp_state = !(s->apicbase & MSR_IA32_APICBASE_BSP) ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;

On reset BSP VCPU should set env->mp_state to KVM_MP_STATE_RUNNABLE and
all others to KVM_MP_STATE_UNINITIALIZED.

--
			Gleb.
Jan Kiszka - Feb. 2, 2010, 12:40 p.m.
Gleb Natapov wrote:
> On Tue, Feb 02, 2010 at 01:31:50PM +0100, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> On Tue, Feb 02, 2010 at 09:19:01AM +0100, Jan Kiszka wrote:
>>>> Push mpstate reading/writing into kvm_arch_load/save_regs and, on x86,
>>>> properly synchronize with halted in the accessor functions.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  hw/apic.c             |    7 ----
>>>>  qemu-kvm-ia64.c       |    4 ++-
>>>>  qemu-kvm-x86.c        |   88 +++++++++++++++++++++++++++---------------------
>>>>  qemu-kvm.c            |   30 -----------------
>>>>  qemu-kvm.h            |   15 --------
>>>>  target-i386/machine.c |    6 ---
>>>>  target-ia64/machine.c |    3 ++
>>>>  7 files changed, 55 insertions(+), 98 deletions(-)
>>>>
>>>> diff --git a/hw/apic.c b/hw/apic.c
>>>> index 3e03e10..092c61e 100644
>>>> --- a/hw/apic.c
>>>> +++ b/hw/apic.c
>>>> @@ -507,13 +507,6 @@ void apic_init_reset(CPUState *env)
>>>>      s->wait_for_sipi = 1;
>>>>  
>>>>      env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
>>>> -#ifdef KVM_CAP_MP_STATE
>>>> -    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
>>>> -        env->mp_state
>>>> -            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
>>>> -        kvm_load_mpstate(env);
>>>> -    }
>>>> -#endif
>>>>  }
>>>>  
>>>>  static void apic_startup(APICState *s, int vector_num)
>>>> diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
>>>> index fc8110e..39bcbeb 100644
>>>> --- a/qemu-kvm-ia64.c
>>>> +++ b/qemu-kvm-ia64.c
>>>> @@ -124,7 +124,9 @@ void kvm_arch_cpu_reset(CPUState *env)
>>>>  {
>>>>      if (kvm_irqchip_in_kernel(kvm_context)) {
>>>>  #ifdef KVM_CAP_MP_STATE
>>>> -	kvm_reset_mpstate(env->kvm_cpu_state.vcpu_ctx);
>>>> +        struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
>>>> +        };
>>>> +        kvm_set_mpstate(env, &mp_state);
>>>>  #endif
>>>>      } else {
>>>>  	env->interrupt_request &= ~CPU_INTERRUPT_HARD;
>>>> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
>>>> index 63cd095..6b5895f 100644
>>>> --- a/qemu-kvm-x86.c
>>>> +++ b/qemu-kvm-x86.c
>>>> @@ -754,6 +754,48 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
>>>>          return 0;
>>>>  }
>>>>  
>>>> +static void kvm_arch_save_mpstate(CPUState *env)
>>>> +{
>>>> +#ifdef KVM_CAP_MP_STATE
>>>> +    int r;
>>>> +    struct kvm_mp_state mp_state;
>>>> +
>>>> +    r = kvm_get_mpstate(env, &mp_state);
>>>> +    if (r < 0) {
>>>> +        env->mp_state = -1;
>>>> +    } else {
>>>> +        env->mp_state = mp_state.mp_state;
>>>> +        if (kvm_irqchip_in_kernel()) {
>>>> +            env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
>>>> +        }
>>>> +    }
>>>> +#else
>>>> +    env->mp_state = -1;
>>>> +#endif
>>>> +}
>>>> +
>>>> +static void kvm_arch_load_mpstate(CPUState *env)
>>>> +{
>>>> +#ifdef KVM_CAP_MP_STATE
>>>> +    struct kvm_mp_state mp_state;
>>>> +
>>>> +    /*
>>>> +     * -1 indicates that the host did not support GET_MP_STATE ioctl,
>>>> +     *  so don't touch it.
>>>> +     */
>>>> +    if (env->mp_state != -1) {
>>>> +        if (kvm_irqchip_in_kernel()) {
>>>> +            env->mp_state = env->halted ? KVM_MP_STATE_UNINITIALIZED :
>>>> +                                          KVM_MP_STATE_RUNNABLE;
>>> When irqchip is in kernel env->halted doesn't contain any relevant
>>> information, so this is incorrect. Actually env->halted is updated only
>>> to show correct cpu state during "info cpus".
>> OK, copied from apic_init_reset, see above. So that hunk was probably at
>> least useless, and now it's harmfull. Will drop this and only sync from
>> mp_state -> halted.
>>
> It was not useless in apic_init_reset it was a shortcut for:
> env->mp_state = !(s->apicbase & MSR_IA32_APICBASE_BSP) ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
> 
> On reset BSP VCPU should set env->mp_state to KVM_MP_STATE_RUNNABLE and
> all others to KVM_MP_STATE_UNINITIALIZED.

OK, belongs to kvm vpcu init code then - less encrypted.

Jan

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 3e03e10..092c61e 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -507,13 +507,6 @@  void apic_init_reset(CPUState *env)
     s->wait_for_sipi = 1;
 
     env->halted = !(s->apicbase & MSR_IA32_APICBASE_BSP);
-#ifdef KVM_CAP_MP_STATE
-    if (kvm_enabled() && kvm_irqchip_in_kernel()) {
-        env->mp_state
-            = env->halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
-        kvm_load_mpstate(env);
-    }
-#endif
 }
 
 static void apic_startup(APICState *s, int vector_num)
diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
index fc8110e..39bcbeb 100644
--- a/qemu-kvm-ia64.c
+++ b/qemu-kvm-ia64.c
@@ -124,7 +124,9 @@  void kvm_arch_cpu_reset(CPUState *env)
 {
     if (kvm_irqchip_in_kernel(kvm_context)) {
 #ifdef KVM_CAP_MP_STATE
-	kvm_reset_mpstate(env->kvm_cpu_state.vcpu_ctx);
+        struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
+        };
+        kvm_set_mpstate(env, &mp_state);
 #endif
     } else {
 	env->interrupt_request &= ~CPU_INTERRUPT_HARD;
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 63cd095..6b5895f 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -754,6 +754,48 @@  static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
         return 0;
 }
 
+static void kvm_arch_save_mpstate(CPUState *env)
+{
+#ifdef KVM_CAP_MP_STATE
+    int r;
+    struct kvm_mp_state mp_state;
+
+    r = kvm_get_mpstate(env, &mp_state);
+    if (r < 0) {
+        env->mp_state = -1;
+    } else {
+        env->mp_state = mp_state.mp_state;
+        if (kvm_irqchip_in_kernel()) {
+            env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
+        }
+    }
+#else
+    env->mp_state = -1;
+#endif
+}
+
+static void kvm_arch_load_mpstate(CPUState *env)
+{
+#ifdef KVM_CAP_MP_STATE
+    struct kvm_mp_state mp_state;
+
+    /*
+     * -1 indicates that the host did not support GET_MP_STATE ioctl,
+     *  so don't touch it.
+     */
+    if (env->mp_state != -1) {
+        if (kvm_irqchip_in_kernel()) {
+            env->mp_state = env->halted ? KVM_MP_STATE_UNINITIALIZED :
+                                          KVM_MP_STATE_RUNNABLE;
+            /* Avoid deadlock: no user space IRQ will ever clear it. */
+            env->halted = 0;
+        }
+        mp_state.mp_state = env->mp_state;
+        kvm_set_mpstate(env, &mp_state);
+    }
+#endif
+}
+
 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
 {
     lhs->selector = rhs->selector;
@@ -926,6 +968,10 @@  void kvm_arch_load_regs(CPUState *env, int level)
     rc = kvm_set_msrs(env, msrs, n);
     if (rc == -1)
         perror("kvm_set_msrs FAILED");
+
+    if (level >= KVM_PUT_RESET_STATE) {
+        kvm_arch_load_mpstate(env);
+    }
 }
 
 void kvm_load_tsc(CPUState *env)
@@ -940,36 +986,6 @@  void kvm_load_tsc(CPUState *env)
         perror("kvm_set_tsc FAILED.\n");
 }
 
-void kvm_arch_save_mpstate(CPUState *env)
-{
-#ifdef KVM_CAP_MP_STATE
-    int r;
-    struct kvm_mp_state mp_state;
-
-    r = kvm_get_mpstate(env, &mp_state);
-    if (r < 0)
-        env->mp_state = -1;
-    else
-        env->mp_state = mp_state.mp_state;
-#else
-    env->mp_state = -1;
-#endif
-}
-
-void kvm_arch_load_mpstate(CPUState *env)
-{
-#ifdef KVM_CAP_MP_STATE
-    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
-
-    /*
-     * -1 indicates that the host did not support GET_MP_STATE ioctl,
-     *  so don't touch it.
-     */
-    if (env->mp_state != -1)
-        kvm_set_mpstate(env, &mp_state);
-#endif
-}
-
 void kvm_arch_save_regs(CPUState *env)
 {
     struct kvm_regs regs;
@@ -1366,15 +1382,9 @@  void kvm_arch_cpu_reset(CPUState *env)
 {
     kvm_arch_reset_vcpu(env);
     kvm_put_vcpu_events(env);
-    if (!cpu_is_bsp(env)) {
-	if (kvm_irqchip_in_kernel()) {
-#ifdef KVM_CAP_MP_STATE
-	    kvm_reset_mpstate(env);
-#endif
-	} else {
-	    env->interrupt_request &= ~CPU_INTERRUPT_HARD;
-	    env->halted = 1;
-	}
+    if (!cpu_is_bsp(env) && !kvm_irqchip_in_kernel()) {
+        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        env->halted = 1;
     }
 }
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 53030f1..efa6a29 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1579,36 +1579,6 @@  void kvm_update_interrupt_request(CPUState *env)
     }
 }
 
-static void kvm_do_load_mpstate(void *_env)
-{
-    CPUState *env = _env;
-
-    kvm_arch_load_mpstate(env);
-}
-
-void kvm_load_mpstate(CPUState *env)
-{
-    if (kvm_enabled() && qemu_system_ready && kvm_vcpu_inited(env))
-        on_vcpu(env, kvm_do_load_mpstate, env);
-}
-
-static void kvm_do_save_mpstate(void *_env)
-{
-    CPUState *env = _env;
-
-    kvm_arch_save_mpstate(env);
-#ifdef KVM_CAP_MP_STATE
-    if (kvm_irqchip_in_kernel())
-        env->halted = (env->mp_state == KVM_MP_STATE_HALTED);
-#endif
-}
-
-void kvm_save_mpstate(CPUState *env)
-{
-    if (kvm_enabled())
-        on_vcpu(env, kvm_do_save_mpstate, env);
-}
-
 int kvm_cpu_exec(CPUState *env)
 {
     int r;
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 6d785a0..aa7bcce 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -299,16 +299,6 @@  int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state);
  *
  */
 int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state);
-/*!
- *  * \brief Reset VCPU MP state
- *
- */
-static inline int kvm_reset_mpstate(CPUState *env)
-{
-    struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED
-    };
-    return kvm_set_mpstate(env, &mp_state);
-}
 #endif
 
 /*!
@@ -874,8 +864,6 @@  static inline void kvm_inject_x86_mce(CPUState *cenv, int bank,
 int kvm_main_loop(void);
 int kvm_init_ap(void);
 int kvm_vcpu_inited(CPUState *env);
-void kvm_load_mpstate(CPUState *env);
-void kvm_save_mpstate(CPUState *env);
 void kvm_apic_init(CPUState *env);
 /* called from vcpu initialization */
 void qemu_kvm_load_lapic(CPUState *env);
@@ -909,8 +897,6 @@  int kvm_arch_qemu_create_context(void);
 
 void kvm_arch_save_regs(CPUState *env);
 void kvm_arch_load_regs(CPUState *env, int level);
-void kvm_arch_load_mpstate(CPUState *env);
-void kvm_arch_save_mpstate(CPUState *env);
 int kvm_arch_has_work(CPUState *env);
 void kvm_arch_process_irqchip_events(CPUState *env);
 int kvm_arch_try_push_interrupts(void *opaque);
@@ -979,7 +965,6 @@  void kvm_load_tsc(CPUState *env);
 #ifdef TARGET_I386
 #define qemu_kvm_has_pit_state2() (0)
 #endif
-#define kvm_save_mpstate(env)   do {} while(0)
 #define qemu_kvm_cpu_stop(env) do {} while(0)
 static inline void kvm_load_tsc(CPUState *env)
 {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index bebde82..61e6a87 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -323,7 +323,6 @@  static void cpu_pre_save(void *opaque)
     int i;
 
     if (kvm_enabled()) {
-        kvm_save_mpstate(env);
         kvm_get_vcpu_events(env);
     }
 
@@ -362,12 +361,7 @@  static int cpu_post_load(void *opaque, int version_id)
     tlb_flush(env, 1);
 
     if (kvm_enabled()) {
-        /* when in-kernel irqchip is used, env->halted causes deadlock
-           because no userspace IRQs will ever clear this flag */
-        env->halted = 0;
-
         kvm_load_tsc(env);
-        kvm_load_mpstate(env);
         kvm_put_vcpu_events(env);
     }
 
diff --git a/target-ia64/machine.c b/target-ia64/machine.c
index fdbeeef..8cf5bdd 100644
--- a/target-ia64/machine.c
+++ b/target-ia64/machine.c
@@ -4,6 +4,9 @@ 
 #include "exec-all.h"
 #include "qemu-kvm.h"
 
+void kvm_arch_save_mpstate(CPUState *env);
+void kvm_arch_load_mpstate(CPUState *env);
+
 void cpu_save(QEMUFile *f, void *opaque)
 {
     CPUState *env = opaque;