Message ID | 1427932716-11800-1-git-send-email-namit@cs.technion.ac.il |
---|---|
State | New |
Headers | show |
On 02/04/2015 01:58, Nadav Amit wrote: > Since the BSP bit is writable on real hardware, during reset all the CPUs which > were not chosen to be the BSP should have their BSP bit cleared. This fix is > required for KVM to work correctly when it changes the BSP bit. > > An additional fix is required for QEMU tcg to allow software to change the BSP > bit. > > Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> > --- > hw/intc/apic_common.c | 8 ++++++-- > include/hw/i386/apic.h | 2 +- > target-i386/cpu.c | 4 +--- > 3 files changed, 8 insertions(+), 6 deletions(-) > > diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c > index 0858b45..042e960 100644 > --- a/hw/intc/apic_common.c > +++ b/hw/intc/apic_common.c > @@ -215,14 +215,18 @@ void apic_init_reset(DeviceState *dev) > } > } > > -void apic_designate_bsp(DeviceState *dev) > +void apic_designate_bsp(DeviceState *dev, bool bsp) > { > if (dev == NULL) { > return; > } > > APICCommonState *s = APIC_COMMON(dev); > - s->apicbase |= MSR_IA32_APICBASE_BSP; > + if (bsp) { > + s->apicbase |= MSR_IA32_APICBASE_BSP; > + } else { > + s->apicbase &= ~MSR_IA32_APICBASE_BSP; > + } > } > > static void apic_reset_common(DeviceState *dev) > diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h > index 1d48e02..51eb6d3 100644 > --- a/include/hw/i386/apic.h > +++ b/include/hw/i386/apic.h > @@ -21,7 +21,7 @@ void apic_sipi(DeviceState *s); > void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, > TPRAccess access); > void apic_poll_irq(DeviceState *d); > -void apic_designate_bsp(DeviceState *d); > +void apic_designate_bsp(DeviceState *d, bool bsp); > > /* pc.c */ > DeviceState *cpu_get_current_apic(void); > diff --git a/target-i386/cpu.c b/target-i386/cpu.c > index b2d1c95..03b33cf 100644 > --- a/target-i386/cpu.c > +++ b/target-i386/cpu.c > @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s) > > #if !defined(CONFIG_USER_ONLY) > /* We hard-wire the BSP to the first CPU. */ > - if (s->cpu_index == 0) { > - apic_designate_bsp(cpu->apic_state); > - } > + apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); > > s->halted = !cpu_is_bsp(cpu); > > Thanks, applied locally. Paolo
+Eduardo as target-i386 maintainer Am 02.04.2015 um 14:34 schrieb Paolo Bonzini: > > > On 02/04/2015 01:58, Nadav Amit wrote: >> Since the BSP bit is writable on real hardware, during reset all the CPUs which >> were not chosen to be the BSP should have their BSP bit cleared. This fix is >> required for KVM to work correctly when it changes the BSP bit. >> >> An additional fix is required for QEMU tcg to allow software to change the BSP >> bit. >> >> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> >> --- >> hw/intc/apic_common.c | 8 ++++++-- >> include/hw/i386/apic.h | 2 +- >> target-i386/cpu.c | 4 +--- >> 3 files changed, 8 insertions(+), 6 deletions(-) >> >> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c >> index 0858b45..042e960 100644 >> --- a/hw/intc/apic_common.c >> +++ b/hw/intc/apic_common.c >> @@ -215,14 +215,18 @@ void apic_init_reset(DeviceState *dev) >> } >> } >> >> -void apic_designate_bsp(DeviceState *dev) >> +void apic_designate_bsp(DeviceState *dev, bool bsp) >> { >> if (dev == NULL) { >> return; >> } >> >> APICCommonState *s = APIC_COMMON(dev); >> - s->apicbase |= MSR_IA32_APICBASE_BSP; >> + if (bsp) { >> + s->apicbase |= MSR_IA32_APICBASE_BSP; >> + } else { >> + s->apicbase &= ~MSR_IA32_APICBASE_BSP; >> + } >> } >> >> static void apic_reset_common(DeviceState *dev) >> diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h >> index 1d48e02..51eb6d3 100644 >> --- a/include/hw/i386/apic.h >> +++ b/include/hw/i386/apic.h >> @@ -21,7 +21,7 @@ void apic_sipi(DeviceState *s); >> void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, >> TPRAccess access); >> void apic_poll_irq(DeviceState *d); >> -void apic_designate_bsp(DeviceState *d); >> +void apic_designate_bsp(DeviceState *d, bool bsp); >> >> /* pc.c */ >> DeviceState *cpu_get_current_apic(void); >> diff --git a/target-i386/cpu.c b/target-i386/cpu.c >> index b2d1c95..03b33cf 100644 >> --- a/target-i386/cpu.c >> +++ b/target-i386/cpu.c >> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s) >> >> #if !defined(CONFIG_USER_ONLY) >> /* We hard-wire the BSP to the first CPU. */ >> - if (s->cpu_index == 0) { >> - apic_designate_bsp(cpu->apic_state); >> - } >> + apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); >> >> s->halted = !cpu_is_bsp(cpu); >> >> > > Thanks, applied locally. I don't understand why this is necessary: The cpu_index doesn't change, therefore the BSP designation won't change either. Is this a hot-unplug preparation? Or is a KVM-specific call missing here? Either way, the commit message could use some clarification. The API change itself looks okay. Regards, Andreas
On 07/04/2015 12:15, Andreas Färber wrote: >>> >> diff --git a/target-i386/cpu.c b/target-i386/cpu.c >>> >> index b2d1c95..03b33cf 100644 >>> >> --- a/target-i386/cpu.c >>> >> +++ b/target-i386/cpu.c >>> >> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s) >>> >> >>> >> #if !defined(CONFIG_USER_ONLY) >>> >> /* We hard-wire the BSP to the first CPU. */ >>> >> - if (s->cpu_index == 0) { >>> >> - apic_designate_bsp(cpu->apic_state); >>> >> - } >>> >> + apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); >>> >> >>> >> s->halted = !cpu_is_bsp(cpu); >>> >> >>> >> >> > >> > Thanks, applied locally. > I don't understand why this is necessary: The cpu_index doesn't change, > therefore the BSP designation won't change either. It can change at runtime, though, if you're using the KVM in-kernel LAPIC. Paolo Is this a hot-unplug > preparation? Or is a KVM-specific call missing here? Either way, the > commit message could use some clarification.
Am 07.04.2015 um 12:34 schrieb Paolo Bonzini: > On 07/04/2015 12:15, Andreas Färber wrote: >>>>>> diff --git a/target-i386/cpu.c b/target-i386/cpu.c >>>>>> index b2d1c95..03b33cf 100644 >>>>>> --- a/target-i386/cpu.c >>>>>> +++ b/target-i386/cpu.c >>>>>> @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s) >>>>>> >>>>>> #if !defined(CONFIG_USER_ONLY) >>>>>> /* We hard-wire the BSP to the first CPU. */ >>>>>> - if (s->cpu_index == 0) { >>>>>> - apic_designate_bsp(cpu->apic_state); >>>>>> - } >>>>>> + apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); >>>>>> >>>>>> s->halted = !cpu_is_bsp(cpu); >>>>>> >>>>>> >>>> >>>> Thanks, applied locally. >> I don't understand why this is necessary: The cpu_index doesn't change, >> therefore the BSP designation won't change either. > > It can change at runtime, though, if you're using the KVM in-kernel LAPIC. Got a pointer? A quick git-grep doesn't show anything in hw/ or kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the initial value. Andreas
On 07/04/2015 12:44, Andreas Färber wrote: >> > It can change at runtime, though, if you're using the KVM in-kernel LAPIC. > Got a pointer? A quick git-grep doesn't show anything in hw/ or > kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the > initial value. Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with KVM in-kernel LAPIC. It cannot change with QEMU's userspace LAPIC. Because it can change, you have to call apic_designate_bsp for all CPUs and not only on CPU 0. Paolo
Am 07.04.2015 um 12:54 schrieb Paolo Bonzini: > On 07/04/2015 12:44, Andreas Färber wrote: >>>> It can change at runtime, though, if you're using the KVM in-kernel LAPIC. >> Got a pointer? A quick git-grep doesn't show anything in hw/ or >> kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the >> initial value. > > Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with > KVM in-kernel LAPIC. It cannot change with QEMU's userspace LAPIC. > > Because it can change, you have to call apic_designate_bsp for all CPUs > and not only on CPU 0. Now I'm even more confused. Surely CPUState is initially zero-initialized. Then we designate one as BSP on reset. That should be the same result as designating all non-BSP CPUs, no? The only way that would change is then cpu_index == 0 goes away (hot-unplug, not implemented yet), and in that case it would be about designating a different CPU, not about un-designating one. If this is some issue with sync'ing state back and forth before QEMU and KVM then the real issue has not been explained. Andreas
Am 07.04.2015 um 13:09 schrieb Andreas Färber: > Am 07.04.2015 um 12:54 schrieb Paolo Bonzini: >> On 07/04/2015 12:44, Andreas Färber wrote: >>>>> It can change at runtime, though, if you're using the KVM in-kernel LAPIC. >>> Got a pointer? A quick git-grep doesn't show anything in hw/ or >>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always have the >>> initial value. >> >> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change with >> KVM in-kernel LAPIC. It cannot change with QEMU's userspace LAPIC. >> >> Because it can change, you have to call apic_designate_bsp for all CPUs >> and not only on CPU 0. > > Now I'm even more confused. Surely CPUState is initially > zero-initialized. Then we designate one as BSP on reset. That should be > the same result as designating all non-BSP CPUs, no? The only way that > would change is then cpu_index == 0 goes away (hot-unplug, not > implemented yet), and in that case it would be about designating a > different CPU, not about un-designating one. > > If this is some issue with sync'ing state back and forth before QEMU and > KVM then the real issue has not been explained. Hm, hw/intc/apic_common.c:apic_reset_common() has: bsp = cpu_is_bsp(s->cpu); s->apicbase = APIC_DEFAULT_ADDRESS | (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; What this is doing is really: bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP; s->apicbase = APIC_DEFAULT_ADDRESS | (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; Unless I'm missing something, since we are in the APIC device's reset function, this is effectively a twisted way of writing: bsp = s->apicbase & MSR_IA32_APICBASE_BSP; s->apicbase = APIC_DEFAULT_ADDRESS | (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; In which case we already relied on s->cpu and could thus simply change this to something like: bsp = CPU(s->cpu)->cpu_index == 0; s->apicbase = APIC_DEFAULT_ADDRESS | (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; Then the apicbase manipulation would be nicely encapsulated in the APIC rather than the APIC reset retaining it and the CPU reset meddling with its state. Andreas
On Tue, 07 Apr 2015 13:57:34 +0200 Andreas Färber <afaerber@suse.de> wrote: > Am 07.04.2015 um 13:09 schrieb Andreas Färber: > > Am 07.04.2015 um 12:54 schrieb Paolo Bonzini: > >> On 07/04/2015 12:44, Andreas Färber wrote: > >>>>> It can change at runtime, though, if you're using the KVM > >>>>> in-kernel LAPIC. > >>> Got a pointer? A quick git-grep doesn't show anything in hw/ or > >>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always > >>> have the initial value. > >> > >> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change > >> with KVM in-kernel LAPIC. It cannot change with QEMU's userspace > >> LAPIC. > >> > >> Because it can change, you have to call apic_designate_bsp for all > >> CPUs and not only on CPU 0. > > > > Now I'm even more confused. Surely CPUState is initially > > zero-initialized. Then we designate one as BSP on reset. That > > should be the same result as designating all non-BSP CPUs, no? The > > only way that would change is then cpu_index == 0 goes away > > (hot-unplug, not implemented yet), and in that case it would be > > about designating a different CPU, not about un-designating one. > > > > If this is some issue with sync'ing state back and forth before > > QEMU and KVM then the real issue has not been explained. guest can set BSP flag in apicbase of non the first CPU and then o reset on KVM exit it will be propagated into QEMU's state kvm_arch_post_run() -> cpu_set_apic_base() as result with current code after reset we will have the first CPU with BSP bit and another one since nobody bothered to clear it. That's what this patch does. [...] > Unless I'm missing something, since we are in the APIC device's reset > function, this is effectively a twisted way of writing: > > bsp = s->apicbase & MSR_IA32_APICBASE_BSP; > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > In which case we already relied on s->cpu and could thus simply change > this to something like: > > bsp = CPU(s->cpu)->cpu_index == 0; ^^^ shouldn't be part of APIC code, APIC shouldn't have anything to do with cpu_index. we do above in CPU code currently /* We hard-wire the BSP to the first CPU. */ if (s->cpu_index == 0) { apic_designate_bsp(cpu->apic_state); } > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > Then the apicbase manipulation would be nicely encapsulated in the > APIC rather than the APIC reset retaining it and the CPU reset > meddling with its state. > > Andreas >
On 07/04/2015 13:57, Andreas Färber wrote: >> > If this is some issue with sync'ing state back and forth before QEMU and >> > KVM then the real issue has not been explained. > Hm, hw/intc/apic_common.c:apic_reset_common() has: > > bsp = cpu_is_bsp(s->cpu); > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > What this is doing is really: > > bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP; > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > Unless I'm missing something, since we are in the APIC device's reset > function, this is effectively a twisted way of writing: > > bsp = s->apicbase & MSR_IA32_APICBASE_BSP; > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; Yes, this is more readable. > In which case we already relied on s->cpu and could thus simply change > this to something like: > > bsp = CPU(s->cpu)->cpu_index == 0; > s->apicbase = APIC_DEFAULT_ADDRESS | > (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > Then the apicbase manipulation would be nicely encapsulated in the APIC > rather than the APIC reset retaining it and the CPU reset meddling with > its state. ... but I find this worse. apic_designate_bsp corresponds to hardware executing the MP initialization protocol. Paolo
Am 07.04.2015 um 15:14 schrieb Igor Mammedov: > On Tue, 07 Apr 2015 13:57:34 +0200 > Andreas Färber <afaerber@suse.de> wrote: > >> Am 07.04.2015 um 13:09 schrieb Andreas Färber: >>> Am 07.04.2015 um 12:54 schrieb Paolo Bonzini: >>>> On 07/04/2015 12:44, Andreas Färber wrote: >>>>>>> It can change at runtime, though, if you're using the KVM >>>>>>> in-kernel LAPIC. >>>>> Got a pointer? A quick git-grep doesn't show anything in hw/ or >>>>> kvm-all.c or target-i386/ assigning cpu_index, so it'll always >>>>> have the initial value. >>>> >>>> Not cpu_index, s->apicbase's MSR_IA32_APICBASE_BSP bit can change >>>> with KVM in-kernel LAPIC. It cannot change with QEMU's userspace >>>> LAPIC. >>>> >>>> Because it can change, you have to call apic_designate_bsp for all >>>> CPUs and not only on CPU 0. >>> >>> Now I'm even more confused. Surely CPUState is initially >>> zero-initialized. Then we designate one as BSP on reset. That >>> should be the same result as designating all non-BSP CPUs, no? The >>> only way that would change is then cpu_index == 0 goes away >>> (hot-unplug, not implemented yet), and in that case it would be >>> about designating a different CPU, not about un-designating one. >>> >>> If this is some issue with sync'ing state back and forth before >>> QEMU and KVM then the real issue has not been explained. > guest can set BSP flag in apicbase of non the first CPU and then o reset > on KVM exit it will be propagated into QEMU's state > kvm_arch_post_run() -> cpu_set_apic_base() > > as result with current code after reset we will have the first CPU with > BSP bit and another one since nobody bothered to clear it. > > That's what this patch does. > > [...] >> Unless I'm missing something, since we are in the APIC device's reset >> function, this is effectively a twisted way of writing: >> >> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >> s->apicbase = APIC_DEFAULT_ADDRESS | >> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >> >> In which case we already relied on s->cpu and could thus simply change >> this to something like: >> >> bsp = CPU(s->cpu)->cpu_index == 0; > ^^^ shouldn't be part of APIC code, APIC shouldn't have anything to do > with cpu_index. > > we do above in CPU code currently > /* We hard-wire the BSP to the first CPU. */ > if (s->cpu_index == 0) { > apic_designate_bsp(cpu->apic_state); > } I know, that's what this patch is changing, and I am saying that by the same logic the CPU has no business fiddling with the APIC's apicbase field when the APIC's reset is touching that very same field. And I do remember that we arrived at this solution to get to a halfway simple generic reset semantics, moving an external BSP designation into the reset handling. However, this patch is making the twisted logic worse IMO. Andreas > > >> s->apicbase = APIC_DEFAULT_ADDRESS | >> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >> >> Then the apicbase manipulation would be nicely encapsulated in the >> APIC rather than the APIC reset retaining it and the CPU reset >> meddling with its state. >> >> Andreas >> >
On 07/04/15 16:18, Paolo Bonzini wrote: > > > On 07/04/2015 13:57, Andreas Färber wrote: >>>> If this is some issue with sync'ing state back and forth before QEMU and >>>> KVM then the real issue has not been explained. >> Hm, hw/intc/apic_common.c:apic_reset_common() has: >> >> bsp = cpu_is_bsp(s->cpu); >> s->apicbase = APIC_DEFAULT_ADDRESS | >> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >> >> What this is doing is really: >> >> bsp = cpu_get_apic_base(s->cpu->apic_state) & MSR_IA32_APICBASE_BSP; >> s->apicbase = APIC_DEFAULT_ADDRESS | >> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >> >> Unless I'm missing something, since we are in the APIC device's reset >> function, this is effectively a twisted way of writing: >> >> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >> s->apicbase = APIC_DEFAULT_ADDRESS | >> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; > > Yes, this is more readable. just $0.02 :) why don't bsp = s->apicbase & MSR_IA32_APICBASE_BSP; s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE; in this case. This looks the same from the textual point of view. Den
On 07/04/2015 15:26, Denis V. Lunev wrote: > On 07/04/15 16:18, Paolo Bonzini wrote: >> >> >> On 07/04/2015 13:57, Andreas Färber wrote: >>>>> If this is some issue with sync'ing state back and forth before >>>>> QEMU and >>>>> KVM then the real issue has not been explained. >>> Hm, hw/intc/apic_common.c:apic_reset_common() has: >>> >>> bsp = cpu_is_bsp(s->cpu); >>> s->apicbase = APIC_DEFAULT_ADDRESS | >>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >>> >>> What this is doing is really: >>> >>> bsp = cpu_get_apic_base(s->cpu->apic_state) & >>> MSR_IA32_APICBASE_BSP; >>> s->apicbase = APIC_DEFAULT_ADDRESS | >>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >>> >>> Unless I'm missing something, since we are in the APIC device's reset >>> function, this is effectively a twisted way of writing: >>> >>> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >>> s->apicbase = APIC_DEFAULT_ADDRESS | >>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >> >> Yes, this is more readable. > > just $0.02 :) > > why don't > bsp = s->apicbase & MSR_IA32_APICBASE_BSP; > s->apicbase = > APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE; > in this case. This looks the same from the textual point of view. Yes. Would you like to send a patch? Paolo
On 07/04/2015 15:24, Andreas Färber wrote: >> > /* We hard-wire the BSP to the first CPU. */ >> > if (s->cpu_index == 0) { >> > apic_designate_bsp(cpu->apic_state); >> > } > I know, that's what this patch is changing, and I am saying that by the > same logic the CPU has no business fiddling with the APIC's apicbase > field when the APIC's reset is touching that very same field. That's exactly what a real CPU does on power up or #RESET, though. Paolo
Am 07.04.2015 um 15:29 schrieb Paolo Bonzini: > On 07/04/2015 15:24, Andreas Färber wrote: >>>> /* We hard-wire the BSP to the first CPU. */ >>>> if (s->cpu_index == 0) { >>>> apic_designate_bsp(cpu->apic_state); >>>> } >> I know, that's what this patch is changing, and I am saying that by the >> same logic the CPU has no business fiddling with the APIC's apicbase >> field when the APIC's reset is touching that very same field. > > That's exactly what a real CPU does on power up or #RESET, though. Does the APIC retain its BSP bit value on #RESET though? I doubt it. It feels we're awkwardly working around qdev reset semantics here... If you say the CPU must be in charge, then we should assure that the APIC is reset before the CPU designates it and not have the APIC reset callback retain such bits. Admittedly, if this were for-2.3 (as which it is not marked) then this patch may be the least intrusive. But it isn't and I've been preparing to refactor the CPU-APIC relationship, so I really want to get it right long-term. Andreas
On 07/04/15 16:29, Paolo Bonzini wrote: > > > On 07/04/2015 15:26, Denis V. Lunev wrote: >> On 07/04/15 16:18, Paolo Bonzini wrote: >>> >>> >>> On 07/04/2015 13:57, Andreas Färber wrote: >>>>>> If this is some issue with sync'ing state back and forth before >>>>>> QEMU and >>>>>> KVM then the real issue has not been explained. >>>> Hm, hw/intc/apic_common.c:apic_reset_common() has: >>>> >>>> bsp = cpu_is_bsp(s->cpu); >>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >>>> >>>> What this is doing is really: >>>> >>>> bsp = cpu_get_apic_base(s->cpu->apic_state) & >>>> MSR_IA32_APICBASE_BSP; >>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >>>> >>>> Unless I'm missing something, since we are in the APIC device's reset >>>> function, this is effectively a twisted way of writing: >>>> >>>> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; >>> >>> Yes, this is more readable. >> >> just $0.02 :) >> >> why don't >> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >> s->apicbase = >> APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE; >> in this case. This looks the same from the textual point of view. > > Yes. Would you like to send a patch? > > Paolo > no prob, just give me 2 minutes. Side note, bsp will become uint32_t and we will loose tracepoint inside cpu_get_apic_base() on this path... Den
Am 07.04.2015 um 15:44 schrieb Denis V. Lunev: > On 07/04/15 16:29, Paolo Bonzini wrote: >> >> >> On 07/04/2015 15:26, Denis V. Lunev wrote: >>> On 07/04/15 16:18, Paolo Bonzini wrote: >>>> >>>> >>>> On 07/04/2015 13:57, Andreas Färber wrote: >>>>>>> If this is some issue with sync'ing state back and forth before >>>>>>> QEMU and >>>>>>> KVM then the real issue has not been explained. >>>>> Hm, hw/intc/apic_common.c:apic_reset_common() has: >>>>> >>>>> bsp = cpu_is_bsp(s->cpu); >>>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | >>>>> MSR_IA32_APICBASE_ENABLE; >>>>> >>>>> What this is doing is really: >>>>> >>>>> bsp = cpu_get_apic_base(s->cpu->apic_state) & >>>>> MSR_IA32_APICBASE_BSP; >>>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | >>>>> MSR_IA32_APICBASE_ENABLE; >>>>> >>>>> Unless I'm missing something, since we are in the APIC device's reset >>>>> function, this is effectively a twisted way of writing: >>>>> >>>>> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >>>>> s->apicbase = APIC_DEFAULT_ADDRESS | >>>>> (bsp ? MSR_IA32_APICBASE_BSP : 0) | >>>>> MSR_IA32_APICBASE_ENABLE; >>>> >>>> Yes, this is more readable. >>> >>> just $0.02 :) >>> >>> why don't >>> bsp = s->apicbase & MSR_IA32_APICBASE_BSP; >>> s->apicbase = >>> APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE; >>> in this case. This looks the same from the textual point of view. >> >> Yes. Would you like to send a patch? >> >> Paolo >> > > no prob, just give me 2 minutes. Side note, bsp will become uint32_t > and we will loose tracepoint inside cpu_get_apic_base() on this > path... Yes, I intentionally left it out above as I don't think we need to trace the local usage here. Worth mentioning in the commit message though. Regards, Andreas
On 07/04/2015 15:40, Andreas Färber wrote: > Am 07.04.2015 um 15:29 schrieb Paolo Bonzini: >> On 07/04/2015 15:24, Andreas Färber wrote: >>>>> /* We hard-wire the BSP to the first CPU. */ >>>>> if (s->cpu_index == 0) { >>>>> apic_designate_bsp(cpu->apic_state); >>>>> } >>> I know, that's what this patch is changing, and I am saying that by the >>> same logic the CPU has no business fiddling with the APIC's apicbase >>> field when the APIC's reset is touching that very same field. >> >> That's exactly what a real CPU does on power up or #RESET, though. > > Does the APIC retain its BSP bit value on #RESET though? I doubt it. You cannot tell, since the MP protocol reruns immediately after a full reset. I think we do this in apic_cpu_reset to avoid mess with the initialization order of the APIC and CPU. > It feels we're awkwardly working around qdev reset semantics here... > > If you say the CPU must be in charge, then we should assure that the > APIC is reset before the CPU designates it and not have the APIC reset > callback retain such bits. Yes, I agree, but as you know very well the propagation of signals (be it "reset" or "realize") is a mess. Even if you make the APIC a QOM child of the CPU, this doesn't mean that qdev reset (which is post-order) propagates to the APIC before propagating to the CPU. > Admittedly, if this were for-2.3 (as which it is not marked) then this > patch may be the least intrusive. But it isn't and I've been preparing > to refactor the CPU-APIC relationship, so I really want to get it right > long-term. Well, actually I did post it for inclusion in 2.3 since it affected only KVM and it would be ugly to have 4.0 fail kvm-unit-tests with all existing QEMU releases. Paolo
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 0858b45..042e960 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -215,14 +215,18 @@ void apic_init_reset(DeviceState *dev) } } -void apic_designate_bsp(DeviceState *dev) +void apic_designate_bsp(DeviceState *dev, bool bsp) { if (dev == NULL) { return; } APICCommonState *s = APIC_COMMON(dev); - s->apicbase |= MSR_IA32_APICBASE_BSP; + if (bsp) { + s->apicbase |= MSR_IA32_APICBASE_BSP; + } else { + s->apicbase &= ~MSR_IA32_APICBASE_BSP; + } } static void apic_reset_common(DeviceState *dev) diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index 1d48e02..51eb6d3 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -21,7 +21,7 @@ void apic_sipi(DeviceState *s); void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, TPRAccess access); void apic_poll_irq(DeviceState *d); -void apic_designate_bsp(DeviceState *d); +void apic_designate_bsp(DeviceState *d, bool bsp); /* pc.c */ DeviceState *cpu_get_current_apic(void); diff --git a/target-i386/cpu.c b/target-i386/cpu.c index b2d1c95..03b33cf 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2714,9 +2714,7 @@ static void x86_cpu_reset(CPUState *s) #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ - if (s->cpu_index == 0) { - apic_designate_bsp(cpu->apic_state); - } + apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); s->halted = !cpu_is_bsp(cpu);
Since the BSP bit is writable on real hardware, during reset all the CPUs which were not chosen to be the BSP should have their BSP bit cleared. This fix is required for KVM to work correctly when it changes the BSP bit. An additional fix is required for QEMU tcg to allow software to change the BSP bit. Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> --- hw/intc/apic_common.c | 8 ++++++-- include/hw/i386/apic.h | 2 +- target-i386/cpu.c | 4 +--- 3 files changed, 8 insertions(+), 6 deletions(-)