diff mbox

[RESEND,v8,1/4] apic: map APIC's MMIO region at each CPU's address space

Message ID 4b8fc4d985de6c4f44509b9c47a6152add36d832.1435195913.git.zhugh.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Zhu Guihua June 25, 2015, 2:17 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Replace mapping APIC at global system address space with
mapping it at per-CPU address spaces.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
---
 exec.c                |  5 +++++
 hw/i386/pc.c          |  7 -------
 hw/intc/apic_common.c | 14 ++++++++------
 include/exec/memory.h |  5 +++++
 target-i386/cpu.c     |  2 ++
 5 files changed, 20 insertions(+), 13 deletions(-)

Comments

Andreas Färber June 25, 2015, 4 p.m. UTC | #1
Am 25.06.2015 um 04:17 schrieb Zhu Guihua:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Replace mapping APIC at global system address space with
> mapping it at per-CPU address spaces.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
> ---
>  exec.c                |  5 +++++
>  hw/i386/pc.c          |  7 -------
>  hw/intc/apic_common.c | 14 ++++++++------
>  include/exec/memory.h |  5 +++++
>  target-i386/cpu.c     |  2 ++
>  5 files changed, 20 insertions(+), 13 deletions(-)

Eduardo, is this for you?

> diff --git a/exec.c b/exec.c
> index f7883d2..1cd2e74 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2710,6 +2710,11 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
>      cpu_notify_map_clients();
>  }
>  
> +MemoryRegion *address_space_root_memory_region(AddressSpace *as)
> +{
> +    return as->root;
> +}
> +
>  void *cpu_physical_memory_map(hwaddr addr,
>                                hwaddr *plen,
>                                int is_write)

This looks trivial and could've been a separate preparatory patch.
Paolo, is this part okay with you?

> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 7072930..9f16128 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1076,13 +1076,6 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
>          object_unref(OBJECT(cpu));
>      }
>  
> -    /* map APIC MMIO area if CPU has APIC */
> -    if (cpu && cpu->apic_state) {
> -        /* XXX: what if the base changes? */
> -        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
> -                                APIC_DEFAULT_ADDRESS, 0x1000);
> -    }
> -
>      /* tell smbios about cpuid version and features */
>      smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
>  }
> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
> index 0032b97..cf105f5 100644
> --- a/hw/intc/apic_common.c
> +++ b/hw/intc/apic_common.c
> @@ -296,7 +296,8 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
>      APICCommonClass *info;
>      static DeviceState *vapic;
>      static int apic_no;
> -    static bool mmio_registered;
> +    CPUState *cpu = CPU(s->cpu);
> +    MemoryRegion *root;

Nit: These variables were sorted by non-static vs. static, so the new
ones should've gone before vapic.

>  
>      if (apic_no >= MAX_APICS) {
>          error_setg(errp, "%s initialization failed.",
> @@ -307,11 +308,12 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
>  
>      info = APIC_COMMON_GET_CLASS(s);
>      info->realize(dev, errp);
> -    if (!mmio_registered) {
> -        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
> -        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
> -        mmio_registered = true;
> -    }
> +
> +    root = address_space_root_memory_region(cpu->as);
> +    memory_region_add_subregion_overlap(root,
> +                                        s->apicbase & MSR_IA32_APICBASE_BASE,
> +                                        &s->io_memory,
> +                                        0x1000);
>  
>      /* Note: We need at least 1M to map the VAPIC option ROM */
>      if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 8ae004e..811f027 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1308,6 +1308,11 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
>  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
>                           int is_write, hwaddr access_len);
>  
> +/* address_space_root_memory_region: get root memory region
> + *
> + * @as: #AddressSpace to be accessed
> + */
> +MemoryRegion *address_space_root_memory_region(AddressSpace *as);
>  
>  #endif
>  
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 36b07f9..1fb88f6 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2741,6 +2741,8 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
>      /* TODO: convert to link<> */
>      apic = APIC_COMMON(cpu->apic_state);
>      apic->cpu = cpu;
> +    cpu_set_apic_base(cpu->apic_state,
> +                      APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE);
>  }
>  
>  static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)

Otherwise, if it works now, LGTM.

Regards,
Andreas
Paolo Bonzini June 25, 2015, 4:02 p.m. UTC | #2
On 25/06/2015 18:00, Andreas Färber wrote:
>> -    if (!mmio_registered) {
>> -        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
>> -        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
>> -        mmio_registered = true;
>> -    }
>> +
>> +    root = address_space_root_memory_region(cpu->as);
>> +    memory_region_add_subregion_overlap(root,
>> +                                        s->apicbase & MSR_IA32_APICBASE_BASE,
>> +                                        &s->io_memory,
>> +                                        0x1000);
>>  

I had already remarked that this patch is wrong.  cpu->as is completely
unused on KVM, for example.

Paolo
Andreas Färber June 25, 2015, 4:10 p.m. UTC | #3
Am 25.06.2015 um 18:02 schrieb Paolo Bonzini:
> On 25/06/2015 18:00, Andreas Färber wrote:
>>> -    if (!mmio_registered) {
>>> -        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
>>> -        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
>>> -        mmio_registered = true;
>>> -    }
>>> +
>>> +    root = address_space_root_memory_region(cpu->as);
>>> +    memory_region_add_subregion_overlap(root,
>>> +                                        s->apicbase & MSR_IA32_APICBASE_BASE,
>>> +                                        &s->io_memory,
>>> +                                        0x1000);
>>>  
> 
> I had already remarked that this patch is wrong.  cpu->as is completely
> unused on KVM, for example.

Ah, then I don't understand this [RESEND]. ;)
Either way, not on my plate ATM, it seems.

Did you also outline how it is supposed to be done instead?

Regards,
Andreas
Paolo Bonzini June 25, 2015, 5:02 p.m. UTC | #4
On 25/06/2015 18:10, Andreas Färber wrote:
> Am 25.06.2015 um 18:02 schrieb Paolo Bonzini:
>> On 25/06/2015 18:00, Andreas Färber wrote:
>>>> -    if (!mmio_registered) {
>>>> -        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
>>>> -        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
>>>> -        mmio_registered = true;
>>>> -    }
>>>> +
>>>> +    root = address_space_root_memory_region(cpu->as);
>>>> +    memory_region_add_subregion_overlap(root,
>>>> +                                        s->apicbase & MSR_IA32_APICBASE_BASE,
>>>> +                                        &s->io_memory,
>>>> +                                        0x1000);
>>>>  
>>
>> I had already remarked that this patch is wrong.  cpu->as is completely
>> unused on KVM, for example.
> 
> Ah, then I don't understand this [RESEND]. ;)
> Either way, not on my plate ATM, it seems.
> 
> Did you also outline how it is supposed to be done instead?

I said "I think this patch is incorrect, because you do not install a
separate address space for each CPU.  Also, the CPU address space is
only used with TCG so it should be guarded by "if (tcg_enabled())"."

By the way, now TCG _is_ installing a separate address space per CPU
already, so the patch can simply guard the code with "if (tcg_enabled())".

Paolo
Andreas Färber June 25, 2015, 5:08 p.m. UTC | #5
Am 25.06.2015 um 19:02 schrieb Paolo Bonzini:
> On 25/06/2015 18:10, Andreas Färber wrote:
>> Am 25.06.2015 um 18:02 schrieb Paolo Bonzini:
>>> On 25/06/2015 18:00, Andreas Färber wrote:
>>>>> -    if (!mmio_registered) {
>>>>> -        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
>>>>> -        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
>>>>> -        mmio_registered = true;
>>>>> -    }
>>>>> +
>>>>> +    root = address_space_root_memory_region(cpu->as);
>>>>> +    memory_region_add_subregion_overlap(root,
>>>>> +                                        s->apicbase & MSR_IA32_APICBASE_BASE,
>>>>> +                                        &s->io_memory,
>>>>> +                                        0x1000);
>>>>>  
>>>
>>> I had already remarked that this patch is wrong.  cpu->as is completely
>>> unused on KVM, for example.
>>
>> Ah, then I don't understand this [RESEND]. ;)
>> Either way, not on my plate ATM, it seems.
>>
>> Did you also outline how it is supposed to be done instead?
> 
> I said "I think this patch is incorrect, because you do not install a
> separate address space for each CPU.  Also, the CPU address space is
> only used with TCG so it should be guarded by "if (tcg_enabled())"."
> 
> By the way, now TCG _is_ installing a separate address space per CPU
> already, so the patch can simply guard the code with "if (tcg_enabled())".

Is the APIC MemoryRegion not used by KVM? Otherwise if we still need the
ugly code path for KVM, that's not much of an improvement here.

And is installing a separate address space per CPU for KVM difficult due
to kernel limitations, or is this just a few lines of QEMU code that Zhu
or someone would need to write? :)

Regards,
Andreas
Paolo Bonzini June 25, 2015, 5:27 p.m. UTC | #6
On 25/06/2015 19:08, Andreas Färber wrote:
> > I said "I think this patch is incorrect, because you do not install a
> > separate address space for each CPU.  Also, the CPU address space is
> > only used with TCG so it should be guarded by "if (tcg_enabled())"."
> > 
> > By the way, now TCG _is_ installing a separate address space per CPU
> > already, so the patch can simply guard the code with "if (tcg_enabled())".
> 
> Is the APIC MemoryRegion not used by KVM?

It's used if the userspace LAPIC is.  It's not used together with the
kernel LAPIC (accesses really are trapped by the kernel).

> Otherwise if we still need the
> ugly code path for KVM, that's not much of an improvement here.
> 
> And is installing a separate address space per CPU for KVM difficult due
> to kernel limitations, or is this just a few lines of QEMU code that Zhu
> or someone would need to write? :)

It's basically impossible.  Even though support for multiple address
spaces is going to be in Linux 4.2, there are going to be just two: SMM
and not SMM.  You don't really want to do O(#cpus) stuff in KVM, where
the number of CPUs can be 200 or more.

TCG is okay because the #cpus is not really going to be more than 4-ish.

Paolo
Peter Maydell June 25, 2015, 5:32 p.m. UTC | #7
On 25 June 2015 at 18:27, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 25/06/2015 19:08, Andreas Färber wrote:
>> And is installing a separate address space per CPU for KVM difficult due
>> to kernel limitations, or is this just a few lines of QEMU code that Zhu
>> or someone would need to write? :)
>
> It's basically impossible.  Even though support for multiple address
> spaces is going to be in Linux 4.2, there are going to be just two: SMM
> and not SMM.  You don't really want to do O(#cpus) stuff in KVM, where
> the number of CPUs can be 200 or more.

Can you explain what the issue is here? Shouldn't it just be a matter
of kvm_cpu_exec() doing a dispatch to cpu->as rather than calling
address_space_rw() ?  (Making it do that was one of the things on my
todo list for ARM at some point.)

I'm happy to assume that RAM is shared by all CPUs I guess.

> TCG is okay because the #cpus is not really going to be more than 4-ish.

Well, it might be more than that in future...

-- PMM
Paolo Bonzini June 25, 2015, 5:39 p.m. UTC | #8
On 25/06/2015 19:32, Peter Maydell wrote:
> On 25 June 2015 at 18:27, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> On 25/06/2015 19:08, Andreas Färber wrote:
>>> And is installing a separate address space per CPU for KVM difficult due
>>> to kernel limitations, or is this just a few lines of QEMU code that Zhu
>>> or someone would need to write? :)
>>
>> It's basically impossible.  Even though support for multiple address
>> spaces is going to be in Linux 4.2, there are going to be just two: SMM
>> and not SMM.  You don't really want to do O(#cpus) stuff in KVM, where
>> the number of CPUs can be 200 or more.
> 
> Can you explain what the issue is here? Shouldn't it just be a matter
> of kvm_cpu_exec() doing a dispatch to cpu->as rather than calling
> address_space_rw() ?  (Making it do that was one of the things on my
> todo list for ARM at some point.)

One example of the problem is that different CPU address spaces can have
MMIO in different places.  These MMIO areas can hide RAM depending on
where they're placed and their relative priorities.  If they do, KVM
cannot really assume that a single set of page tables are okay to
convert gpa->hpa for all guest CPUs.

If you can tie this to CPU state (e.g. in or out of system management
mode), you only get a small, constant number of such address spaces.

See http://thread.gmane.org/gmane.comp.emulators.qemu/345230 for the
QEMU part of the multiple-address-space support.

Paolo

> I'm happy to assume that RAM is shared by all CPUs I guess.
> 
>> TCG is okay because the #cpus is not really going to be more than 4-ish.
> 
> Well, it might be more than that in future...
> 
> -- PMM
>
Igor Mammedov June 26, 2015, 9:01 a.m. UTC | #9
On Thu, 25 Jun 2015 19:27:47 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> 
> 
> On 25/06/2015 19:08, Andreas Färber wrote:
> > > I said "I think this patch is incorrect, because you do not install a
> > > separate address space for each CPU.  Also, the CPU address space is
> > > only used with TCG so it should be guarded by "if (tcg_enabled())"."
> > > 
> > > By the way, now TCG _is_ installing a separate address space per CPU
> > > already, so the patch can simply guard the code with "if (tcg_enabled())".
> > 
> > Is the APIC MemoryRegion not used by KVM?
> 
> It's used if the userspace LAPIC is.  It's not used together with the
> kernel LAPIC (accesses really are trapped by the kernel).
Isn't this memory region still handle MSI MMIO in case of kernel LAPIC?
kvm_apic_mem_write() -> kvm_irqchip_send_msi()

> 
> > Otherwise if we still need the
> > ugly code path for KVM, that's not much of an improvement here.
> > 
> > And is installing a separate address space per CPU for KVM difficult due
> > to kernel limitations, or is this just a few lines of QEMU code that Zhu
> > or someone would need to write? :)
> 
> It's basically impossible.  Even though support for multiple address
> spaces is going to be in Linux 4.2, there are going to be just two: SMM
> and not SMM.  You don't really want to do O(#cpus) stuff in KVM, where
> the number of CPUs can be 200 or more.
> 
> TCG is okay because the #cpus is not really going to be more than 4-ish.
> 
> Paolo
Paolo Bonzini June 26, 2015, 9:05 a.m. UTC | #10
On 26/06/2015 11:01, Igor Mammedov wrote:
> > It's used if the userspace LAPIC is.  It's not used together with the
> > kernel LAPIC (accesses really are trapped by the kernel).
>
> Isn't this memory region still handle MSI MMIO in case of kernel LAPIC?
> kvm_apic_mem_write() -> kvm_irqchip_send_msi()

I'm not sure if the MSI MMIO area moves together with the APIC base.  I
think it doesn't.

Paolo
diff mbox

Patch

diff --git a/exec.c b/exec.c
index f7883d2..1cd2e74 100644
--- a/exec.c
+++ b/exec.c
@@ -2710,6 +2710,11 @@  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
     cpu_notify_map_clients();
 }
 
+MemoryRegion *address_space_root_memory_region(AddressSpace *as)
+{
+    return as->root;
+}
+
 void *cpu_physical_memory_map(hwaddr addr,
                               hwaddr *plen,
                               int is_write)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7072930..9f16128 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1076,13 +1076,6 @@  void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
         object_unref(OBJECT(cpu));
     }
 
-    /* map APIC MMIO area if CPU has APIC */
-    if (cpu && cpu->apic_state) {
-        /* XXX: what if the base changes? */
-        sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
-                                APIC_DEFAULT_ADDRESS, 0x1000);
-    }
-
     /* tell smbios about cpuid version and features */
     smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
 }
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 0032b97..cf105f5 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -296,7 +296,8 @@  static void apic_common_realize(DeviceState *dev, Error **errp)
     APICCommonClass *info;
     static DeviceState *vapic;
     static int apic_no;
-    static bool mmio_registered;
+    CPUState *cpu = CPU(s->cpu);
+    MemoryRegion *root;
 
     if (apic_no >= MAX_APICS) {
         error_setg(errp, "%s initialization failed.",
@@ -307,11 +308,12 @@  static void apic_common_realize(DeviceState *dev, Error **errp)
 
     info = APIC_COMMON_GET_CLASS(s);
     info->realize(dev, errp);
-    if (!mmio_registered) {
-        ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
-        memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
-        mmio_registered = true;
-    }
+
+    root = address_space_root_memory_region(cpu->as);
+    memory_region_add_subregion_overlap(root,
+                                        s->apicbase & MSR_IA32_APICBASE_BASE,
+                                        &s->io_memory,
+                                        0x1000);
 
     /* Note: We need at least 1M to map the VAPIC option ROM */
     if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 8ae004e..811f027 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1308,6 +1308,11 @@  void *address_space_map(AddressSpace *as, hwaddr addr,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len);
 
+/* address_space_root_memory_region: get root memory region
+ *
+ * @as: #AddressSpace to be accessed
+ */
+MemoryRegion *address_space_root_memory_region(AddressSpace *as);
 
 #endif
 
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 36b07f9..1fb88f6 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2741,6 +2741,8 @@  static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
     /* TODO: convert to link<> */
     apic = APIC_COMMON(cpu->apic_state);
     apic->cpu = cpu;
+    cpu_set_apic_base(cpu->apic_state,
+                      APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE);
 }
 
 static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)