diff mbox

target-i386: clear bsp bit when designating bsp

Message ID 1427932716-11800-1-git-send-email-namit@cs.technion.ac.il
State New
Headers show

Commit Message

Nadav Amit April 1, 2015, 11:58 p.m. UTC
Since the BSP bit is writable on real hardware, during reset all the CPUs which
were not chosen to be the BSP should have their BSP bit cleared. This fix is
required for KVM to work correctly when it changes the BSP bit.

An additional fix is required for QEMU tcg to allow software to change the BSP
bit.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
---
 hw/intc/apic_common.c  | 8 ++++++--
 include/hw/i386/apic.h | 2 +-
 target-i386/cpu.c      | 4 +---
 3 files changed, 8 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini April 2, 2015, 12:34 p.m. UTC | #1
On 02/04/2015 01:58, Nadav Amit wrote:
> Since the BSP bit is writable on real hardware, during reset all the CPUs which
> were not chosen to be the BSP should have their BSP bit cleared. This fix is
> required for KVM to work correctly when it changes the BSP bit.
> 
> An additional fix is required for QEMU tcg to allow software to change the BSP
> bit.
> 
> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
> ---
>  hw/intc/apic_common.c  | 8 ++++++--
>  include/hw/i386/apic.h | 2 +-
>  target-i386/cpu.c      | 4 +---
>  3 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
> index 0858b45..042e960 100644
> --- a/hw/intc/apic_common.c
> +++ b/hw/intc/apic_common.c
> @@ -215,14 +215,18 @@ void apic_init_reset(DeviceState *dev)
>      }
>  }
>  
> -void apic_designate_bsp(DeviceState *dev)
> +void apic_designate_bsp(DeviceState *dev, bool bsp)
>  {
>      if (dev == NULL) {
>          return;
>      }
>  
>      APICCommonState *s = APIC_COMMON(dev);
> -    s->apicbase |= MSR_IA32_APICBASE_BSP;
> +    if (bsp) {
> +        s->apicbase |= MSR_IA32_APICBASE_BSP;
> +    } else {
> +        s->apicbase &= ~MSR_IA32_APICBASE_BSP;
> +    }
>  }
>  
>  static void apic_reset_common(DeviceState *dev)
> diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
> index 1d48e02..51eb6d3 100644
> --- a/include/hw/i386/apic.h
> +++ b/include/hw/i386/apic.h
> @@ -21,7 +21,7 @@ void apic_sipi(DeviceState *s);
>  void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
>                                     TPRAccess access);
>  void apic_poll_irq(DeviceState *d);
> -void apic_designate_bsp(DeviceState *d);
> +void apic_designate_bsp(DeviceState *d, bool bsp);
>  
>  /* pc.c */
>  DeviceState *cpu_get_current_apic(void);
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index b2d1c95..03b33cf 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s)
>  
>  #if !defined(CONFIG_USER_ONLY)
>      /* We hard-wire the BSP to the first CPU. */
> -    if (s->cpu_index == 0) {
> -        apic_designate_bsp(cpu->apic_state);
> -    }
> +    apic_designate_bsp(cpu->apic_state, s->cpu_index == 0);
>  
>      s->halted = !cpu_is_bsp(cpu);
>  
> 

Thanks, applied locally.

Paolo
Andreas Färber April 7, 2015, 10:15 a.m. UTC | #2
+Eduardo as target-i386 maintainer

Am 02.04.2015 um 14:34 schrieb Paolo Bonzini:
> 
> 
> On 02/04/2015 01:58, Nadav Amit wrote:
>> Since the BSP bit is writable on real hardware, during reset all the CPUs which
>> were not chosen to be the BSP should have their BSP bit cleared. This fix is
>> required for KVM to work correctly when it changes the BSP bit.
>>
>> An additional fix is required for QEMU tcg to allow software to change the BSP
>> bit.
>>
>> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
>> ---
>>  hw/intc/apic_common.c  | 8 ++++++--
>>  include/hw/i386/apic.h | 2 +-
>>  target-i386/cpu.c      | 4 +---
>>  3 files changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
>> index 0858b45..042e960 100644
>> --- a/hw/intc/apic_common.c
>> +++ b/hw/intc/apic_common.c
>> @@ -215,14 +215,18 @@ void apic_init_reset(DeviceState *dev)
>>      }
>>  }
>>  
>> -void apic_designate_bsp(DeviceState *dev)
>> +void apic_designate_bsp(DeviceState *dev, bool bsp)
>>  {
>>      if (dev == NULL) {
>>          return;
>>      }
>>  
>>      APICCommonState *s = APIC_COMMON(dev);
>> -    s->apicbase |= MSR_IA32_APICBASE_BSP;
>> +    if (bsp) {
>> +        s->apicbase |= MSR_IA32_APICBASE_BSP;
>> +    } else {
>> +        s->apicbase &= ~MSR_IA32_APICBASE_BSP;
>> +    }
>>  }
>>  
>>  static void apic_reset_common(DeviceState *dev)
>> diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
>> index 1d48e02..51eb6d3 100644
>> --- a/include/hw/i386/apic.h
>> +++ b/include/hw/i386/apic.h
>> @@ -21,7 +21,7 @@ void apic_sipi(DeviceState *s);
>>  void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
>>                                     TPRAccess access);
>>  void apic_poll_irq(DeviceState *d);
>> -void apic_designate_bsp(DeviceState *d);
>> +void apic_designate_bsp(DeviceState *d, bool bsp);
>>  
>>  /* pc.c */
>>  DeviceState *cpu_get_current_apic(void);
>> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
>> index b2d1c95..03b33cf 100644
>> --- a/target-i386/cpu.c
>> +++ b/target-i386/cpu.c
>> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s)
>>  
>>  #if !defined(CONFIG_USER_ONLY)
>>      /* We hard-wire the BSP to the first CPU. */
>> -    if (s->cpu_index == 0) {
>> -        apic_designate_bsp(cpu->apic_state);
>> -    }
>> +    apic_designate_bsp(cpu->apic_state, s->cpu_index == 0);
>>  
>>      s->halted = !cpu_is_bsp(cpu);
>>  
>>
> 
> Thanks, applied locally.

I don't understand why this is necessary: The cpu_index doesn't change,
therefore the BSP designation won't change either. Is this a hot-unplug
preparation? Or is a KVM-specific call missing here? Either way, the
commit message could use some clarification.

The API change itself looks okay.

Regards,
Andreas
Paolo Bonzini April 7, 2015, 10:34 a.m. UTC | #3
On 07/04/2015 12:15, Andreas Färber wrote:
>>> >> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
>>> >> index b2d1c95..03b33cf 100644
>>> >> --- a/target-i386/cpu.c
>>> >> +++ b/target-i386/cpu.c
>>> >> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s)
>>> >>  
>>> >>  #if !defined(CONFIG_USER_ONLY)
>>> >>      /* We hard-wire the BSP to the first CPU. */
>>> >> -    if (s->cpu_index == 0) {
>>> >> -        apic_designate_bsp(cpu->apic_state);
>>> >> -    }
>>> >> +    apic_designate_bsp(cpu->apic_state, s->cpu_index == 0);
>>> >>  
>>> >>      s->halted = !cpu_is_bsp(cpu);
>>> >>  
>>> >>
>> > 
>> > Thanks, applied locally.
> I don't understand why this is necessary: The cpu_index doesn't change,
> therefore the BSP designation won't change either.

It can change at runtime, though, if you're using the KVM in-kernel LAPIC.

Paolo

 Is this a hot-unplug
> preparation? Or is a KVM-specific call missing here? Either way, the
> commit message could use some clarification.
Andreas Färber April 7, 2015, 10:44 a.m. UTC | #4
Am 07.04.2015 um 12:34 schrieb Paolo Bonzini:
> On 07/04/2015 12:15, Andreas Färber wrote:
>>>>>> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
>>>>>> index b2d1c95..03b33cf 100644
>>>>>> --- a/target-i386/cpu.c
>>>>>> +++ b/target-i386/cpu.c
>>>>>> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s)
>>>>>>  
>>>>>>  #if !defined(CONFIG_USER_ONLY)
>>>>>>      /* We hard-wire the BSP to the first CPU. */
>>>>>> -    if (s->cpu_index == 0) {
>>>>>> -        apic_designate_bsp(cpu->apic_state);
>>>>>> -    }
>>>>>> +    apic_designate_bsp(cpu->apic_state, s->cpu_index == 0);
>>>>>>  
>>>>>>      s->halted = !cpu_is_bsp(cpu);
>>>>>>  
>>>>>>
>>>>
>>>> Thanks, applied locally.
>> I don't understand why this is necessary: The cpu_index doesn't change,
>> therefore the BSP designation won't change either.
> 
> It can change at runtime, though, if you're using the KVM in-kernel LAPIC.

Got a pointer? A quick git-grep doesn't show anything in hw/ or
kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the
initial value.

Andreas
Paolo Bonzini April 7, 2015, 10:54 a.m. UTC | #5
On 07/04/2015 12:44, Andreas Färber wrote:
>> > It can change at runtime, though, if you're using the KVM in-kernel LAPIC.
> Got a pointer? A quick git-grep doesn't show anything in hw/ or
> kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the
> initial value.

Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with
KVM in-kernel LAPIC.  It cannot change with QEMU's userspace LAPIC.

Because it can change, you have to call apic_designate_bsp for all CPUs
and not only on CPU 0.

Paolo
Andreas Färber April 7, 2015, 11:09 a.m. UTC | #6
Am 07.04.2015 um 12:54 schrieb Paolo Bonzini:
> On 07/04/2015 12:44, Andreas Färber wrote:
>>>> It can change at runtime, though, if you're using the KVM in-kernel LAPIC.
>> Got a pointer? A quick git-grep doesn't show anything in hw/ or
>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the
>> initial value.
> 
> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with
> KVM in-kernel LAPIC.  It cannot change with QEMU's userspace LAPIC.
> 
> Because it can change, you have to call apic_designate_bsp for all CPUs
> and not only on CPU 0.

Now I'm even more confused. Surely CPUState is initially
zero-initialized. Then we designate one as BSP on reset. That should be
the same result as designating all non-BSP CPUs, no? The only way that
would change is then cpu_index == 0 goes away (hot-unplug, not
implemented yet), and in that case it would be about designating a
different CPU, not about un-designating one.

If this is some issue with sync'ing state back and forth before QEMU and
KVM then the real issue has not been explained.

Andreas
Andreas Färber April 7, 2015, 11:57 a.m. UTC | #7
Am 07.04.2015 um 13:09 schrieb Andreas Färber:
> Am 07.04.2015 um 12:54 schrieb Paolo Bonzini:
>> On 07/04/2015 12:44, Andreas Färber wrote:
>>>>> It can change at runtime, though, if you're using the KVM in-kernel LAPIC.
>>> Got a pointer? A quick git-grep doesn't show anything in hw/ or
>>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the
>>> initial value.
>>
>> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with
>> KVM in-kernel LAPIC.  It cannot change with QEMU's userspace LAPIC.
>>
>> Because it can change, you have to call apic_designate_bsp for all CPUs
>> and not only on CPU 0.
> 
> Now I'm even more confused. Surely CPUState is initially
> zero-initialized. Then we designate one as BSP on reset. That should be
> the same result as designating all non-BSP CPUs, no? The only way that
> would change is then cpu_index == 0 goes away (hot-unplug, not
> implemented yet), and in that case it would be about designating a
> different CPU, not about un-designating one.
> 
> If this is some issue with sync'ing state back and forth before QEMU and
> KVM then the real issue has not been explained.

Hm, hw/intc/apic_common.c:apic_reset_common() has:

    bsp = cpu_is_bsp(s->cpu);
    s->apicbase = APIC_DEFAULT_ADDRESS |
        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

What this is doing is really:

    bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP;
    s->apicbase = APIC_DEFAULT_ADDRESS |
        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

Unless I'm missing something, since we are in the APIC device's reset
function, this is effectively a twisted way of writing:

    bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
    s->apicbase = APIC_DEFAULT_ADDRESS |
        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

In which case we already relied on s->cpu and could thus simply change
this to something like:

    bsp = CPU(s->cpu)->cpu_index == 0;
    s->apicbase = APIC_DEFAULT_ADDRESS |
        (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

Then the apicbase manipulation would be nicely encapsulated in the APIC
rather than the APIC reset retaining it and the CPU reset meddling with
its state.

Andreas
Igor Mammedov April 7, 2015, 1:14 p.m. UTC | #8
On Tue, 07 Apr 2015 13:57:34 +0200
Andreas Färber <afaerber@suse.de> wrote:

> Am 07.04.2015 um 13:09 schrieb Andreas Färber:
> > Am 07.04.2015 um 12:54 schrieb Paolo Bonzini:
> >> On 07/04/2015 12:44, Andreas Färber wrote:
> >>>>> It can change at runtime, though, if you're using the KVM
> >>>>> in-kernel LAPIC.
> >>> Got a pointer? A quick git-grep doesn't show anything in hw/ or
> >>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always
> >>> have the initial value.
> >>
> >> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change
> >> with KVM in-kernel LAPIC.  It cannot change with QEMU's userspace
> >> LAPIC.
> >>
> >> Because it can change, you have to call apic_designate_bsp for all
> >> CPUs and not only on CPU 0.
> > 
> > Now I'm even more confused. Surely CPUState is initially
> > zero-initialized. Then we designate one as BSP on reset. That
> > should be the same result as designating all non-BSP CPUs, no? The
> > only way that would change is then cpu_index == 0 goes away
> > (hot-unplug, not implemented yet), and in that case it would be
> > about designating a different CPU, not about un-designating one.
> > 
> > If this is some issue with sync'ing state back and forth before
> > QEMU and KVM then the real issue has not been explained.
guest can set BSP flag in apicbase of non the first CPU and then o reset
on KVM exit it will be propagated into QEMU's state
kvm_arch_post_run() -> cpu_set_apic_base()

as result with current code after reset we will have the first CPU with
BSP bit and another one since nobody bothered to clear it.

That's what this patch does. 

[...]
> Unless I'm missing something, since we are in the APIC device's reset
> function, this is effectively a twisted way of writing:
> 
>     bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> 
> In which case we already relied on s->cpu and could thus simply change
> this to something like:
> 
>     bsp = CPU(s->cpu)->cpu_index == 0;
^^^ shouldn't be part of APIC code, APIC shouldn't have anything to do
with cpu_index.

we do above in CPU code currently
    /* We hard-wire the BSP to the first CPU. */
    if (s->cpu_index == 0) {
        apic_designate_bsp(cpu->apic_state);
    }


>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> 
> Then the apicbase manipulation would be nicely encapsulated in the
> APIC rather than the APIC reset retaining it and the CPU reset
> meddling with its state.
> 
> Andreas
>
Paolo Bonzini April 7, 2015, 1:18 p.m. UTC | #9
On 07/04/2015 13:57, Andreas Färber wrote:
>> > If this is some issue with sync'ing state back and forth before QEMU and
>> > KVM then the real issue has not been explained.
> Hm, hw/intc/apic_common.c:apic_reset_common() has:
> 
>     bsp = cpu_is_bsp(s->cpu);
>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> 
> What this is doing is really:
> 
>     bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP;
>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> 
> Unless I'm missing something, since we are in the APIC device's reset
> function, this is effectively a twisted way of writing:
> 
>     bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

Yes, this is more readable.

> In which case we already relied on s->cpu and could thus simply change
> this to something like:
> 
>     bsp = CPU(s->cpu)->cpu_index == 0;
>     s->apicbase = APIC_DEFAULT_ADDRESS |
>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
> 
> Then the apicbase manipulation would be nicely encapsulated in the APIC
> rather than the APIC reset retaining it and the CPU reset meddling with
> its state.

... but I find this worse.  apic_designate_bsp corresponds to hardware
executing the MP initialization protocol.

Paolo
Andreas Färber April 7, 2015, 1:24 p.m. UTC | #10
Am 07.04.2015 um 15:14 schrieb Igor Mammedov:
> On Tue, 07 Apr 2015 13:57:34 +0200
> Andreas Färber <afaerber@suse.de> wrote:
> 
>> Am 07.04.2015 um 13:09 schrieb Andreas Färber:
>>> Am 07.04.2015 um 12:54 schrieb Paolo Bonzini:
>>>> On 07/04/2015 12:44, Andreas Färber wrote:
>>>>>>> It can change at runtime, though, if you're using the KVM
>>>>>>> in-kernel LAPIC.
>>>>> Got a pointer? A quick git-grep doesn't show anything in hw/ or
>>>>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always
>>>>> have the initial value.
>>>>
>>>> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change
>>>> with KVM in-kernel LAPIC.  It cannot change with QEMU's userspace
>>>> LAPIC.
>>>>
>>>> Because it can change, you have to call apic_designate_bsp for all
>>>> CPUs and not only on CPU 0.
>>>
>>> Now I'm even more confused. Surely CPUState is initially
>>> zero-initialized. Then we designate one as BSP on reset. That
>>> should be the same result as designating all non-BSP CPUs, no? The
>>> only way that would change is then cpu_index == 0 goes away
>>> (hot-unplug, not implemented yet), and in that case it would be
>>> about designating a different CPU, not about un-designating one.
>>>
>>> If this is some issue with sync'ing state back and forth before
>>> QEMU and KVM then the real issue has not been explained.
> guest can set BSP flag in apicbase of non the first CPU and then o reset
> on KVM exit it will be propagated into QEMU's state
> kvm_arch_post_run() -> cpu_set_apic_base()
> 
> as result with current code after reset we will have the first CPU with
> BSP bit and another one since nobody bothered to clear it.
> 
> That's what this patch does. 
> 
> [...]
>> Unless I'm missing something, since we are in the APIC device's reset
>> function, this is effectively a twisted way of writing:
>>
>>     bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>     s->apicbase = APIC_DEFAULT_ADDRESS |
>>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>
>> In which case we already relied on s->cpu and could thus simply change
>> this to something like:
>>
>>     bsp = CPU(s->cpu)->cpu_index == 0;
> ^^^ shouldn't be part of APIC code, APIC shouldn't have anything to do
> with cpu_index.
> 
> we do above in CPU code currently
>     /* We hard-wire the BSP to the first CPU. */
>     if (s->cpu_index == 0) {
>         apic_designate_bsp(cpu->apic_state);
>     }

I know, that's what this patch is changing, and I am saying that by the
same logic the CPU has no business fiddling with the APIC's apicbase
field when the APIC's reset is touching that very same field.

And I do remember that we arrived at this solution to get to a halfway
simple generic reset semantics, moving an external BSP designation into
the reset handling. However, this patch is making the twisted logic
worse IMO.

Andreas

> 
> 
>>     s->apicbase = APIC_DEFAULT_ADDRESS |
>>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>
>> Then the apicbase manipulation would be nicely encapsulated in the
>> APIC rather than the APIC reset retaining it and the CPU reset
>> meddling with its state.
>>
>> Andreas
>>
>
Denis V. Lunev April 7, 2015, 1:26 p.m. UTC | #11
On 07/04/15 16:18, Paolo Bonzini wrote:
>
>
> On 07/04/2015 13:57, Andreas Färber wrote:
>>>> If this is some issue with sync'ing state back and forth before QEMU and
>>>> KVM then the real issue has not been explained.
>> Hm, hw/intc/apic_common.c:apic_reset_common() has:
>>
>>      bsp = cpu_is_bsp(s->cpu);
>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>
>> What this is doing is really:
>>
>>      bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP;
>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>
>> Unless I'm missing something, since we are in the APIC device's reset
>> function, this is effectively a twisted way of writing:
>>
>>      bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>
> Yes, this is more readable.

just $0.02 :)

why don't
         bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
         s->apicbase =
		APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
in this case. This looks the same from the textual point of view.

Den
Paolo Bonzini April 7, 2015, 1:29 p.m. UTC | #12
On 07/04/2015 15:26, Denis V. Lunev wrote:
> On 07/04/15 16:18, Paolo Bonzini wrote:
>>
>>
>> On 07/04/2015 13:57, Andreas Färber wrote:
>>>>> If this is some issue with sync'ing state back and forth before
>>>>> QEMU and
>>>>> KVM then the real issue has not been explained.
>>> Hm, hw/intc/apic_common.c:apic_reset_common() has:
>>>
>>>      bsp = cpu_is_bsp(s->cpu);
>>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>>
>>> What this is doing is really:
>>>
>>>      bsp = cpu_get_apic_base(s->cpu->apic_state) &
>>> MSR_IA32_APICBASE_BSP;
>>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>>
>>> Unless I'm missing something, since we are in the APIC device's reset
>>> function, this is effectively a twisted way of writing:
>>>
>>>      bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>>      s->apicbase = APIC_DEFAULT_ADDRESS |
>>>          (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>
>> Yes, this is more readable.
> 
> just $0.02 :)
> 
> why don't
>         bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>         s->apicbase =
>         APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
> in this case. This looks the same from the textual point of view.

Yes.  Would you like to send a patch?

Paolo
Paolo Bonzini April 7, 2015, 1:29 p.m. UTC | #13
On 07/04/2015 15:24, Andreas Färber wrote:
>> >     /* We hard-wire the BSP to the first CPU. */
>> >     if (s->cpu_index == 0) {
>> >         apic_designate_bsp(cpu->apic_state);
>> >     }
> I know, that's what this patch is changing, and I am saying that by the
> same logic the CPU has no business fiddling with the APIC's apicbase
> field when the APIC's reset is touching that very same field.

That's exactly what a real CPU does on power up or #RESET, though.

Paolo
Andreas Färber April 7, 2015, 1:40 p.m. UTC | #14
Am 07.04.2015 um 15:29 schrieb Paolo Bonzini:
> On 07/04/2015 15:24, Andreas Färber wrote:
>>>>     /* We hard-wire the BSP to the first CPU. */
>>>>     if (s->cpu_index == 0) {
>>>>         apic_designate_bsp(cpu->apic_state);
>>>>     }
>> I know, that's what this patch is changing, and I am saying that by the
>> same logic the CPU has no business fiddling with the APIC's apicbase
>> field when the APIC's reset is touching that very same field.
> 
> That's exactly what a real CPU does on power up or #RESET, though.

Does the APIC retain its BSP bit value on #RESET though? I doubt it.
It feels we're awkwardly working around qdev reset semantics here...

If you say the CPU must be in charge, then we should assure that the
APIC is reset before the CPU designates it and not have the APIC reset
callback retain such bits.

Admittedly, if this were for-2.3 (as which it is not marked) then this
patch may be the least intrusive. But it isn't and I've been preparing
to refactor the CPU-APIC relationship, so I really want to get it right
long-term.

Andreas
Denis V. Lunev April 7, 2015, 1:44 p.m. UTC | #15
On 07/04/15 16:29, Paolo Bonzini wrote:
>
>
> On 07/04/2015 15:26, Denis V. Lunev wrote:
>> On 07/04/15 16:18, Paolo Bonzini wrote:
>>>
>>>
>>> On 07/04/2015 13:57, Andreas Färber wrote:
>>>>>> If this is some issue with sync'ing state back and forth before
>>>>>> QEMU and
>>>>>> KVM then the real issue has not been explained.
>>>> Hm, hw/intc/apic_common.c:apic_reset_common() has:
>>>>
>>>>       bsp = cpu_is_bsp(s->cpu);
>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>>>
>>>> What this is doing is really:
>>>>
>>>>       bsp = cpu_get_apic_base(s->cpu->apic_state) &
>>>> MSR_IA32_APICBASE_BSP;
>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>>>
>>>> Unless I'm missing something, since we are in the APIC device's reset
>>>> function, this is effectively a twisted way of writing:
>>>>
>>>>       bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>>>
>>> Yes, this is more readable.
>>
>> just $0.02 :)
>>
>> why don't
>>          bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>          s->apicbase =
>>          APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
>> in this case. This looks the same from the textual point of view.
>
> Yes.  Would you like to send a patch?
>
> Paolo
>

no prob, just give me 2 minutes. Side note, bsp will become uint32_t
and we will loose tracepoint inside cpu_get_apic_base() on this
path...

Den
Andreas Färber April 7, 2015, 1:47 p.m. UTC | #16
Am 07.04.2015 um 15:44 schrieb Denis V. Lunev:
> On 07/04/15 16:29, Paolo Bonzini wrote:
>>
>>
>> On 07/04/2015 15:26, Denis V. Lunev wrote:
>>> On 07/04/15 16:18, Paolo Bonzini wrote:
>>>>
>>>>
>>>> On 07/04/2015 13:57, Andreas Färber wrote:
>>>>>>> If this is some issue with sync'ing state back and forth before
>>>>>>> QEMU and
>>>>>>> KVM then the real issue has not been explained.
>>>>> Hm, hw/intc/apic_common.c:apic_reset_common() has:
>>>>>
>>>>>       bsp = cpu_is_bsp(s->cpu);
>>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) |
>>>>> MSR_IA32_APICBASE_ENABLE;
>>>>>
>>>>> What this is doing is really:
>>>>>
>>>>>       bsp = cpu_get_apic_base(s->cpu->apic_state) &
>>>>> MSR_IA32_APICBASE_BSP;
>>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) |
>>>>> MSR_IA32_APICBASE_ENABLE;
>>>>>
>>>>> Unless I'm missing something, since we are in the APIC device's reset
>>>>> function, this is effectively a twisted way of writing:
>>>>>
>>>>>       bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>>>>       s->apicbase = APIC_DEFAULT_ADDRESS |
>>>>>           (bsp ? MSR_IA32_APICBASE_BSP : 0) |
>>>>> MSR_IA32_APICBASE_ENABLE;
>>>>
>>>> Yes, this is more readable.
>>>
>>> just $0.02 :)
>>>
>>> why don't
>>>          bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
>>>          s->apicbase =
>>>          APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
>>> in this case. This looks the same from the textual point of view.
>>
>> Yes.  Would you like to send a patch?
>>
>> Paolo
>>
> 
> no prob, just give me 2 minutes. Side note, bsp will become uint32_t
> and we will loose tracepoint inside cpu_get_apic_base() on this
> path...

Yes, I intentionally left it out above as I don't think we need to trace
the local usage here. Worth mentioning in the commit message though.

Regards,
Andreas
Paolo Bonzini April 7, 2015, 1:54 p.m. UTC | #17
On 07/04/2015 15:40, Andreas Färber wrote:
> Am 07.04.2015 um 15:29 schrieb Paolo Bonzini:
>> On 07/04/2015 15:24, Andreas Färber wrote:
>>>>>     /* We hard-wire the BSP to the first CPU. */
>>>>>     if (s->cpu_index == 0) {
>>>>>         apic_designate_bsp(cpu->apic_state);
>>>>>     }
>>> I know, that's what this patch is changing, and I am saying that by the
>>> same logic the CPU has no business fiddling with the APIC's apicbase
>>> field when the APIC's reset is touching that very same field.
>>
>> That's exactly what a real CPU does on power up or #RESET, though.
> 
> Does the APIC retain its BSP bit value on #RESET though? I doubt it.

You cannot tell, since the MP protocol reruns immediately after a full
reset.  I think we do this in apic_cpu_reset to avoid mess with the
initialization order of the APIC and CPU.

> It feels we're awkwardly working around qdev reset semantics here...
> 
> If you say the CPU must be in charge, then we should assure that the
> APIC is reset before the CPU designates it and not have the APIC reset
> callback retain such bits.

Yes, I agree, but as you know very well the propagation of signals (be
it "reset" or "realize") is a mess.

Even if you make the APIC a QOM child of the CPU, this doesn't mean that
qdev reset (which is post-order) propagates to the APIC before
propagating to the CPU.

> Admittedly, if this were for-2.3 (as which it is not marked) then this
> patch may be the least intrusive. But it isn't and I've been preparing
> to refactor the CPU-APIC relationship, so I really want to get it right
> long-term.

Well, actually I did post it for inclusion in 2.3 since it affected only
KVM and it would be ugly to have 4.0 fail kvm-unit-tests with all
existing QEMU releases.

Paolo
diff mbox

Patch

diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 0858b45..042e960 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -215,14 +215,18 @@  void apic_init_reset(DeviceState *dev)
     }
 }
 
-void apic_designate_bsp(DeviceState *dev)
+void apic_designate_bsp(DeviceState *dev, bool bsp)
 {
     if (dev == NULL) {
         return;
     }
 
     APICCommonState *s = APIC_COMMON(dev);
-    s->apicbase |= MSR_IA32_APICBASE_BSP;
+    if (bsp) {
+        s->apicbase |= MSR_IA32_APICBASE_BSP;
+    } else {
+        s->apicbase &= ~MSR_IA32_APICBASE_BSP;
+    }
 }
 
 static void apic_reset_common(DeviceState *dev)
diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
index 1d48e02..51eb6d3 100644
--- a/include/hw/i386/apic.h
+++ b/include/hw/i386/apic.h
@@ -21,7 +21,7 @@  void apic_sipi(DeviceState *s);
 void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
                                    TPRAccess access);
 void apic_poll_irq(DeviceState *d);
-void apic_designate_bsp(DeviceState *d);
+void apic_designate_bsp(DeviceState *d, bool bsp);
 
 /* pc.c */
 DeviceState *cpu_get_current_apic(void);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index b2d1c95..03b33cf 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2714,9 +2714,7 @@  static void x86_cpu_reset(CPUState *s)
 
 #if !defined(CONFIG_USER_ONLY)
     /* We hard-wire the BSP to the first CPU. */
-    if (s->cpu_index == 0) {
-        apic_designate_bsp(cpu->apic_state);
-    }
+    apic_designate_bsp(cpu->apic_state, s->cpu_index == 0);
 
     s->halted = !cpu_is_bsp(cpu);