Patchwork [RFC] apic: Fix relocation

login
register
mail settings
Submitter Jan Kiszka
Date Feb. 3, 2011, 7:32 p.m.
Message ID <4D4B02E8.7060104@siemens.com>
Download mbox | patch
Permalink /patch/81713/
State New
Headers show

Comments

Jan Kiszka - Feb. 3, 2011, 7:32 p.m.
When the guest remaps an APIC by modifying MSR_IA32_APICBASE, we need to
update its mmio mapping. This is a bit tricky as multiple APICs might be
mapped to the same address. So walk through the full list to avoid
unmapping a region that is still in use.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

RFC as I did not yet have a chance to test actual relocation. Standard
OSes don't do this, otherwise we would have noticed this earlier.

 hw/apic.c |   38 +++++++++++++++++++++++++++++++++++++-
 hw/pc.c   |   10 ----------
 2 files changed, 37 insertions(+), 11 deletions(-)
Jan Kiszka - Feb. 3, 2011, 9:24 p.m.
On 2011-02-03 20:43, Blue Swirl wrote:
> On Thu, Feb 3, 2011 at 7:32 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> When the guest remaps an APIC by modifying MSR_IA32_APICBASE, we need to
>> update its mmio mapping. This is a bit tricky as multiple APICs might be
>> mapped to the same address. So walk through the full list to avoid
>> unmapping a region that is still in use.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>
>> RFC as I did not yet have a chance to test actual relocation. Standard
>> OSes don't do this, otherwise we would have noticed this earlier.
>>
>>  hw/apic.c |   38 +++++++++++++++++++++++++++++++++++++-
>>  hw/pc.c   |   10 ----------
>>  2 files changed, 37 insertions(+), 11 deletions(-)
>>
>> diff --git a/hw/apic.c b/hw/apic.c
>> index 05a115f..b64af59 100644
>> --- a/hw/apic.c
>> +++ b/hw/apic.c
>> @@ -294,6 +294,40 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
>>                      trigger_mode);
>>  }
>>
>> +static void apic_update_mapping(APICState *s)
>> +{
>> +    target_phys_addr_t new_addr;
>> +    bool overlap = false;
>> +    APICState *apic_iter;
>> +    int i;
>> +
>> +    for (i = 0; i < MAX_APICS; i++) {
>> +        apic_iter = local_apics[i];
>> +        if (!apic_iter || apic_iter == s) {
>> +            continue;
>> +        }
>> +        if ((apic_iter->apicbase & MSR_IA32_APICBASE_BASE) ==
>> +            s->busdev.mmio[0].addr) {
>> +            overlap = true;
>> +            break;
>> +        }
>> +    }
>> +    if (overlap) {
>> +        /*
>> +         * As APICs are pre-CPU devices, they may have identical base
>> +         * addresses. We must avoid unregistering an old io-region that is
>> +         * still in use by another APIC.
>> +         */
>> +        s->busdev.mmio[0].addr = (target_phys_addr_t)-1;
>> +    }
>> +    if (s->apicbase & MSR_IA32_APICBASE_ENABLE) {
>> +        new_addr = s->apicbase & MSR_IA32_APICBASE_BASE;
>> +    } else {
>> +        new_addr = (target_phys_addr_t)-1;
>> +    }
>> +    sysbus_mmio_map(&s->busdev, 0, new_addr);
>> +}
>> +
>>  void cpu_set_apic_base(DeviceState *d, uint64_t val)
>>  {
>>     APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
>> @@ -302,7 +336,7 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>>
>>     if (!s)
>>         return;
>> -    s->apicbase = (val & 0xfffff000) |
>> +    s->apicbase = (val & MSR_IA32_APICBASE_BASE) |
>>         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
>>     /* if disabled, cannot be enabled again */
>>     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
>> @@ -310,6 +344,7 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
>>         cpu_clear_apic_feature(s->cpu_env);
>>         s->spurious_vec &= ~APIC_SV_ENABLE;
>>     }
>> +    apic_update_mapping(s);
>>  }
>>
>>  uint64_t cpu_get_apic_base(DeviceState *d)
>> @@ -948,6 +983,7 @@ static void apic_reset(DeviceState *d)
>>     bsp = cpu_is_bsp(s->cpu_env);
>>     s->apicbase = 0xfee00000 |
>>         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
>> +    apic_update_mapping(s);
> 
> Here the device maps itself at reset, which is not OK.
> 
>>
>>     apic_init_reset(d);
>>
>> diff --git a/hw/pc.c b/hw/pc.c
>> index 4dfdc0b..294aa66 100644
>> --- a/hw/pc.c
>> +++ b/hw/pc.c
>> @@ -859,7 +859,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
>>  {
>>     DeviceState *dev;
>>     SysBusDevice *d;
>> -    static int apic_mapped;
>>
>>     dev = qdev_create(NULL, "apic");
>>     qdev_prop_set_uint8(dev, "id", apic_id);
>> @@ -867,15 +866,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
>>     qdev_init_nofail(dev);
>>     d = sysbus_from_qdev(dev);
>>
>> -    /* XXX: mapping more APICs at the same memory location */
>> -    if (apic_mapped == 0) {
>> -        /* NOTE: the APIC is directly connected to the CPU - it is not
>> -           on the global memory bus. */
>> -        /* XXX: what if the base changes? */
>> -        sysbus_mmio_map(d, 0, MSI_ADDR_BASE);
>> -        apic_mapped = 1;
> 
> TBH, this is not so cool either. Maybe APIC should not be a Sysbus
> device at all.

You actually need the APIC to A) be part of or a child of its CPU and B)
be part of a second, APIC bus (where we would find IOAPICs as well, also
multi-homed). We don't model this ATM, so we have global local_apics[]
and soon ioapics[]. And we need to coordinate remapping of the APIC
where we have those full views.

But even that's not the actual problem. The problem here is that we need
per-CPU MMIO mapping and handling here, and that is not supported by
QEMU. We would furthermore have to refactor everything around apicbase
updates, and I surely don't want to do this in this fix. It targets the
obvious breakage based on the current QEMU architecture. We can always
improve on top of course.

Jan

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 05a115f..b64af59 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -294,6 +294,40 @@  void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
                      trigger_mode);
 }
 
+static void apic_update_mapping(APICState *s)
+{
+    target_phys_addr_t new_addr;
+    bool overlap = false;
+    APICState *apic_iter;
+    int i;
+
+    for (i = 0; i < MAX_APICS; i++) {
+        apic_iter = local_apics[i];
+        if (!apic_iter || apic_iter == s) {
+            continue;
+        }
+        if ((apic_iter->apicbase & MSR_IA32_APICBASE_BASE) ==
+            s->busdev.mmio[0].addr) {
+            overlap = true;
+            break;
+        }
+    }
+    if (overlap) {
+        /*
+         * As APICs are pre-CPU devices, they may have identical base
+         * addresses. We must avoid unregistering an old io-region that is
+         * still in use by another APIC.
+         */
+        s->busdev.mmio[0].addr = (target_phys_addr_t)-1;
+    }
+    if (s->apicbase & MSR_IA32_APICBASE_ENABLE) {
+        new_addr = s->apicbase & MSR_IA32_APICBASE_BASE;
+    } else {
+        new_addr = (target_phys_addr_t)-1;
+    }
+    sysbus_mmio_map(&s->busdev, 0, new_addr);
+}
+
 void cpu_set_apic_base(DeviceState *d, uint64_t val)
 {
     APICState *s = DO_UPCAST(APICState, busdev.qdev, d);
@@ -302,7 +336,7 @@  void cpu_set_apic_base(DeviceState *d, uint64_t val)
 
     if (!s)
         return;
-    s->apicbase = (val & 0xfffff000) |
+    s->apicbase = (val & MSR_IA32_APICBASE_BASE) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
     /* if disabled, cannot be enabled again */
     if (!(val & MSR_IA32_APICBASE_ENABLE)) {
@@ -310,6 +344,7 @@  void cpu_set_apic_base(DeviceState *d, uint64_t val)
         cpu_clear_apic_feature(s->cpu_env);
         s->spurious_vec &= ~APIC_SV_ENABLE;
     }
+    apic_update_mapping(s);
 }
 
 uint64_t cpu_get_apic_base(DeviceState *d)
@@ -948,6 +983,7 @@  static void apic_reset(DeviceState *d)
     bsp = cpu_is_bsp(s->cpu_env);
     s->apicbase = 0xfee00000 |
         (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;
+    apic_update_mapping(s);
 
     apic_init_reset(d);
 
diff --git a/hw/pc.c b/hw/pc.c
index 4dfdc0b..294aa66 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -859,7 +859,6 @@  static DeviceState *apic_init(void *env, uint8_t apic_id)
 {
     DeviceState *dev;
     SysBusDevice *d;
-    static int apic_mapped;
 
     dev = qdev_create(NULL, "apic");
     qdev_prop_set_uint8(dev, "id", apic_id);
@@ -867,15 +866,6 @@  static DeviceState *apic_init(void *env, uint8_t apic_id)
     qdev_init_nofail(dev);
     d = sysbus_from_qdev(dev);
 
-    /* XXX: mapping more APICs at the same memory location */
-    if (apic_mapped == 0) {
-        /* NOTE: the APIC is directly connected to the CPU - it is not
-           on the global memory bus. */
-        /* XXX: what if the base changes? */
-        sysbus_mmio_map(d, 0, MSI_ADDR_BASE);
-        apic_mapped = 1;
-    }
-
     msix_supported = 1;
 
     return dev;