Patchwork [v8,18/26] i386: define pc guest info

login
register
mail settings
Submitter Michael S. Tsirkin
Date Oct. 3, 2013, 3:05 p.m.
Message ID <1380812639-3868-19-git-send-email-mst@redhat.com>
Download mbox | patch
Permalink /patch/280359/
State New
Headers show

Comments

Michael S. Tsirkin - Oct. 3, 2013, 3:05 p.m.
This defines a structure that will be used to fill in acpi tables
where relevant properties are not yet available using QOM.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Tested-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/i386/pc.h |  9 +++++++++
 hw/i386/pc.c         | 31 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)
Igor Mammedov - Oct. 4, 2013, 4:18 p.m.
On Thu, 3 Oct 2013 18:05:35 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> This defines a structure that will be used to fill in acpi tables
> where relevant properties are not yet available using QOM.
> 
> Reviewed-by: Laszlo Ersek <lersek@redhat.com>
> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
> Tested-by: Gerd Hoffmann <kraxel@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/i386/pc.h |  9 +++++++++
>  hw/i386/pc.c         | 31 +++++++++++++++++++++++++++++++
>  2 files changed, 40 insertions(+)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 9b2ddc4..085a621 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -9,6 +9,9 @@
>  #include "hw/i386/ioapic.h"
>  
>  #include "qemu/range.h"
> +#include "qemu/bitmap.h"
> +#include "sysemu/sysemu.h"
> +#include "hw/pci/pci.h"
>  
>  /* PC-style peripherals (also used by other machines).  */
>  
> @@ -20,6 +23,12 @@ typedef struct PcPciInfo {
>  struct PcGuestInfo {
>      bool has_pci_info;
>      bool isapc_ram_fw;
> +    hwaddr ram_size;
> +    unsigned apic_id_limit;
> +    bool apic_xrupt_override;
> +    uint64_t numa_nodes;
> +    uint64_t *node_mem;
> +    uint64_t *node_cpu;
>      FWCfgState *fw_cfg;
>  };
>  
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 0c313fe..dbae9da 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1028,6 +1028,23 @@ static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info)
>      fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info);
>  }
>  
> +static void pc_set_cpu_guest_info(CPUState *cpu, PcGuestInfo *guest_info)
> +{
> +    CPUClass *klass = CPU_GET_CLASS(cpu);
> +    uint64_t apic_id = klass->get_arch_id(cpu);
> +    int j;
> +
> +    assert(apic_id < guest_info->apic_id_limit);
> +
> +    for (j = 0; j < guest_info->numa_nodes; j++) {
> +        assert(cpu->cpu_index < max_cpus);
> +        if (test_bit(cpu->cpu_index, node_cpumask[j])) {
> +            guest_info->node_cpu[apic_id] = cpu_to_le64(j);
> +            break;
> +        }
> +    }
> +}
> +
>  typedef struct PcGuestInfoState {
>      PcGuestInfo info;
>      Notifier machine_done;
> @@ -1047,6 +1064,20 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
>  {
>      PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
>      PcGuestInfo *guest_info = &guest_info_state->info;
> +    CPUState *cpu;
> +
> +    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> +    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> +    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> +    guest_info->numa_nodes = nb_numa_nodes;
> +    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
> +                                    sizeof *guest_info->node_mem);
> +    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
> +                                     sizeof *guest_info->node_cpu);
> +
> +    CPU_FOREACH(cpu) {
> +        pc_set_cpu_guest_info(cpu, guest_info);
> +    }

pc_guest_info_init() is called only once, now lets suppose we hotplug CPUs
and then reboot guest. Hotadded CPUs won't be accounted in guest_info.node_cpu
since it's initialized only once and is never updated. As result guest will
get stale SRAT table.

Using a callback in acpi_setup/update could allow to get an updated guest_info.

>  
>      guest_info_state->machine_done.notify = pc_guest_info_machine_done;
>      qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
Michael S. Tsirkin - Oct. 6, 2013, 7:59 p.m.
On Fri, Oct 04, 2013 at 06:18:42PM +0200, Igor Mammedov wrote:
> On Thu, 3 Oct 2013 18:05:35 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > This defines a structure that will be used to fill in acpi tables
> > where relevant properties are not yet available using QOM.
> > 
> > Reviewed-by: Laszlo Ersek <lersek@redhat.com>
> > Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
> > Tested-by: Gerd Hoffmann <kraxel@redhat.com>
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  include/hw/i386/pc.h |  9 +++++++++
> >  hw/i386/pc.c         | 31 +++++++++++++++++++++++++++++++
> >  2 files changed, 40 insertions(+)
> > 
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 9b2ddc4..085a621 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -9,6 +9,9 @@
> >  #include "hw/i386/ioapic.h"
> >  
> >  #include "qemu/range.h"
> > +#include "qemu/bitmap.h"
> > +#include "sysemu/sysemu.h"
> > +#include "hw/pci/pci.h"
> >  
> >  /* PC-style peripherals (also used by other machines).  */
> >  
> > @@ -20,6 +23,12 @@ typedef struct PcPciInfo {
> >  struct PcGuestInfo {
> >      bool has_pci_info;
> >      bool isapc_ram_fw;
> > +    hwaddr ram_size;
> > +    unsigned apic_id_limit;
> > +    bool apic_xrupt_override;
> > +    uint64_t numa_nodes;
> > +    uint64_t *node_mem;
> > +    uint64_t *node_cpu;
> >      FWCfgState *fw_cfg;
> >  };
> >  
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 0c313fe..dbae9da 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1028,6 +1028,23 @@ static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info)
> >      fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info);
> >  }
> >  
> > +static void pc_set_cpu_guest_info(CPUState *cpu, PcGuestInfo *guest_info)
> > +{
> > +    CPUClass *klass = CPU_GET_CLASS(cpu);
> > +    uint64_t apic_id = klass->get_arch_id(cpu);
> > +    int j;
> > +
> > +    assert(apic_id < guest_info->apic_id_limit);
> > +
> > +    for (j = 0; j < guest_info->numa_nodes; j++) {
> > +        assert(cpu->cpu_index < max_cpus);
> > +        if (test_bit(cpu->cpu_index, node_cpumask[j])) {
> > +            guest_info->node_cpu[apic_id] = cpu_to_le64(j);
> > +            break;
> > +        }
> > +    }
> > +}
> > +
> >  typedef struct PcGuestInfoState {
> >      PcGuestInfo info;
> >      Notifier machine_done;
> > @@ -1047,6 +1064,20 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> >  {
> >      PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
> >      PcGuestInfo *guest_info = &guest_info_state->info;
> > +    CPUState *cpu;
> > +
> > +    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> > +    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> > +    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> > +    guest_info->numa_nodes = nb_numa_nodes;
> > +    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
> > +                                    sizeof *guest_info->node_mem);
> > +    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
> > +                                     sizeof *guest_info->node_cpu);
> > +
> > +    CPU_FOREACH(cpu) {
> > +        pc_set_cpu_guest_info(cpu, guest_info);
> > +    }
> 
> pc_guest_info_init() is called only once, now lets suppose we hotplug CPUs
> and then reboot guest. Hotadded CPUs won't be accounted in guest_info.node_cpu
> since it's initialized only once and is never updated. As result guest will
> get stale SRAT table.
> 
> Using a callback in acpi_setup/update could allow to get an updated guest_info.

While I agree it's a bug, it's also exactly what happens at the
moment with ACPI in BIOS: BIOS gets the info from
FW CFG file which is only updated once during qemu
initialization.

So I'll take a look at fixing this, but I don't think
it's a blocker for merging this patchset.
Makes sense?

> >      guest_info_state->machine_done.notify = pc_guest_info_machine_done;
> >      qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);

Patch

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9b2ddc4..085a621 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -9,6 +9,9 @@ 
 #include "hw/i386/ioapic.h"
 
 #include "qemu/range.h"
+#include "qemu/bitmap.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
 
 /* PC-style peripherals (also used by other machines).  */
 
@@ -20,6 +23,12 @@  typedef struct PcPciInfo {
 struct PcGuestInfo {
     bool has_pci_info;
     bool isapc_ram_fw;
+    hwaddr ram_size;
+    unsigned apic_id_limit;
+    bool apic_xrupt_override;
+    uint64_t numa_nodes;
+    uint64_t *node_mem;
+    uint64_t *node_cpu;
     FWCfgState *fw_cfg;
 };
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..dbae9da 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1028,6 +1028,23 @@  static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info)
     fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info);
 }
 
+static void pc_set_cpu_guest_info(CPUState *cpu, PcGuestInfo *guest_info)
+{
+    CPUClass *klass = CPU_GET_CLASS(cpu);
+    uint64_t apic_id = klass->get_arch_id(cpu);
+    int j;
+
+    assert(apic_id < guest_info->apic_id_limit);
+
+    for (j = 0; j < guest_info->numa_nodes; j++) {
+        assert(cpu->cpu_index < max_cpus);
+        if (test_bit(cpu->cpu_index, node_cpumask[j])) {
+            guest_info->node_cpu[apic_id] = cpu_to_le64(j);
+            break;
+        }
+    }
+}
+
 typedef struct PcGuestInfoState {
     PcGuestInfo info;
     Notifier machine_done;
@@ -1047,6 +1064,20 @@  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
 {
     PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
     PcGuestInfo *guest_info = &guest_info_state->info;
+    CPUState *cpu;
+
+    guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
+    guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
+    guest_info->apic_xrupt_override = kvm_allows_irq0_override();
+    guest_info->numa_nodes = nb_numa_nodes;
+    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
+                                    sizeof *guest_info->node_mem);
+    guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
+                                     sizeof *guest_info->node_cpu);
+
+    CPU_FOREACH(cpu) {
+        pc_set_cpu_guest_info(cpu, guest_info);
+    }
 
     guest_info_state->machine_done.notify = pc_guest_info_machine_done;
     qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);