Message ID | 1392652187-28381-5-git-send-email-imammedo@redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Feb 17, 2014 at 04:49:45PM +0100, Igor Mammedov wrote: > From: Wanlong Gao <gaowanlong@cn.fujitsu.com> > > Add the numa_info structure to contain the numa nodes memory, > VCPUs information and the future added numa nodes host memory > policies. > > Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> > Signed-off-by: Andre Przywara <andre.przywara@amd.com> > Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Igor Mammedov <imammedo@redhat.com> For pc.c changes here: Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > --- > hw/i386/pc.c | 14 +++++++++----- > hw/ppc/spapr.c | 11 ++++++----- > include/sysemu/sysemu.h | 8 ++++++-- > monitor.c | 2 +- > numa.c | 23 ++++++++++++----------- > vl.c | 7 +++---- > 6 files changed, 37 insertions(+), 28 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index e715a33..42182f9 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -674,14 +674,14 @@ static FWCfgState *bochs_bios_init(void) > unsigned int apic_id = x86_cpu_apic_id_from_index(i); > assert(apic_id < apic_id_limit); > for (j = 0; j < nb_numa_nodes; j++) { > - if (test_bit(i, node_cpumask[j])) { > + if (test_bit(i, numa_info[j].node_cpu)) { > numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); > break; > } > } > } > for (i = 0; i < nb_numa_nodes; i++) { > - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); > + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); > } > fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, > (1 + apic_id_limit + nb_numa_nodes) * > @@ -1077,8 +1077,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); > guest_info->apic_xrupt_override = kvm_allows_irq0_override(); > guest_info->numa_nodes = nb_numa_nodes; > - guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes * > - sizeof *guest_info->node_mem); > + guest_info->node_mem = g_malloc0(guest_info->numa_nodes * > + sizeof *guest_info->node_mem); > + for (i = 0; i < nb_numa_nodes; i++) { > + guest_info->node_mem[i] = numa_info[i].node_mem; > + } > + > guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * > sizeof *guest_info->node_cpu); > > @@ -1086,7 +1090,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > unsigned int apic_id = x86_cpu_apic_id_from_index(i); > assert(apic_id < guest_info->apic_id_limit); > for (j = 0; j < nb_numa_nodes; j++) { > - if (test_bit(i, node_cpumask[j])) { > + if (test_bit(i, numa_info[j].node_cpu)) { > guest_info->node_cpu[apic_id] = j; > break; > } > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 93d02c1..6fd2d95 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -531,8 +531,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) > int i, off; > > /* memory node(s) */ > - if (nb_numa_nodes > 1 && node_mem[0] < ram_size) { > - node0_size = node_mem[0]; > + if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { > + node0_size = numa_info[0].node_mem; > } else { > node0_size = ram_size; > } > @@ -570,7 +570,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) > if (mem_start >= ram_size) { > node_size = 0; > } else { > - node_size = node_mem[i]; > + node_size = numa_info[i].node_mem; > if (node_size > ram_size - mem_start) { > node_size = ram_size - mem_start; > } > @@ -697,7 +697,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > > /* Update the RMA size if necessary */ > if (spapr->vrma_adjust) { > - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; > + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : > + ram_size; > spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift); > } > } > @@ -1115,7 +1116,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) > MemoryRegion *sysmem = get_system_memory(); > MemoryRegion *ram = g_new(MemoryRegion, 1); > hwaddr rma_alloc_size; > - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; > + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size; > uint32_t initrd_base = 0; > long kernel_size = 0, initrd_size = 0; > long load_limit, rtas_limit, fw_size; > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index 2509649..d873b42 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -9,6 +9,7 @@ > #include "qapi-types.h" > #include "qemu/notify.h" > #include "qemu/main-loop.h" > +#include "qemu/bitmap.h" > > /* vl.c */ > > @@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock; > #define MAX_NODES 64 > #define MAX_CPUMASK_BITS 255 > extern int nb_numa_nodes; > -extern uint64_t node_mem[MAX_NODES]; > -extern unsigned long *node_cpumask[MAX_NODES]; > +typedef struct node_info { > + uint64_t node_mem; > + DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); > +} NodeInfo; > +extern NodeInfo numa_info[MAX_NODES]; > void numa_add(const char *optarg); > void set_numa_nodes(void); > void set_numa_modes(void); > diff --git a/monitor.c b/monitor.c > index 690c152..0284735 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict) > } > monitor_printf(mon, "\n"); > monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, > - node_mem[i] >> 20); > + numa_info[i].node_mem >> 20); > } > } > > diff --git a/numa.c b/numa.c > index d12a4f2..c3eca78 100644 > --- a/numa.c > +++ b/numa.c > @@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus) > goto error; > } > > - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); > + bitmap_set(numa_info[nodenr].node_cpu, value, endvalue - value + 1); > return; > > error: > @@ -101,7 +101,7 @@ void numa_add(const char *optarg) > } > > if (get_param_value(option, 128, "mem", optarg) == 0) { > - node_mem[nodenr] = 0; > + numa_info[nodenr].node_mem = 0; > } else { > int64_t sval; > sval = strtosz(option, &endptr); > @@ -109,7 +109,7 @@ void numa_add(const char *optarg) > fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); > exit(1); > } > - node_mem[nodenr] = sval; > + numa_info[nodenr].node_mem = sval; > } > if (get_param_value(option, 128, "cpus", optarg) != 0) { > numa_node_parse_cpus(nodenr, option); > @@ -134,7 +134,7 @@ void set_numa_nodes(void) > * and distribute the available memory equally across all nodes > */ > for (i = 0; i < nb_numa_nodes; i++) { > - if (node_mem[i] != 0) { > + if (numa_info[i].node_mem != 0) { > break; > } > } > @@ -145,15 +145,16 @@ void set_numa_nodes(void) > * the final node gets the rest. > */ > for (i = 0; i < nb_numa_nodes - 1; i++) { > - node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1); > - usedmem += node_mem[i]; > + numa_info[i].node_mem = (ram_size / nb_numa_nodes) & > + ~((1 << 23UL) - 1); > + usedmem += numa_info[i].node_mem; > } > - node_mem[i] = ram_size - usedmem; > + numa_info[i].node_mem = ram_size - usedmem; > } > > uint64_t numa_total = 0; > for (i = 0; i < nb_numa_nodes; i++) { > - numa_total += node_mem[i]; > + numa_total += numa_info[i].node_mem; > } > if (numa_total != ram_size) { > fprintf(stderr, "qemu: numa nodes total memory size " > @@ -162,7 +163,7 @@ void set_numa_nodes(void) > } > > for (i = 0; i < nb_numa_nodes; i++) { > - if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { > + if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { > break; > } > } > @@ -172,7 +173,7 @@ void set_numa_nodes(void) > */ > if (i == nb_numa_nodes) { > for (i = 0; i < max_cpus; i++) { > - set_bit(i, node_cpumask[i % nb_numa_nodes]); > + set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); > } > } > } > @@ -185,7 +186,7 @@ void set_numa_modes(void) > > CPU_FOREACH(cpu) { > for (i = 0; i < nb_numa_nodes; i++) { > - if (test_bit(cpu->cpu_index, node_cpumask[i])) { > + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { > cpu->numa_node = i; > } > } > diff --git a/vl.c b/vl.c > index 0adac0c..915f8b7 100644 > --- a/vl.c > +++ b/vl.c > @@ -196,8 +196,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = > QTAILQ_HEAD_INITIALIZER(fw_boot_order); > > int nb_numa_nodes; > -uint64_t node_mem[MAX_NODES]; > -unsigned long *node_cpumask[MAX_NODES]; > +NodeInfo numa_info[MAX_NODES]; > > uint8_t qemu_uuid[16]; > bool qemu_uuid_set; > @@ -2787,8 +2786,8 @@ int main(int argc, char **argv, char **envp) > translation = BIOS_ATA_TRANSLATION_AUTO; > > for (i = 0; i < MAX_NODES; i++) { > - node_mem[i] = 0; > - node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS); > + numa_info[i].node_mem = 0; > + bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); > } > > nb_numa_nodes = 0; > -- > 1.7.1
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e715a33..42182f9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -674,14 +674,14 @@ static FWCfgState *bochs_bios_init(void) unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); break; } } } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, (1 + apic_id_limit + nb_numa_nodes) * @@ -1077,8 +1077,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); guest_info->apic_xrupt_override = kvm_allows_irq0_override(); guest_info->numa_nodes = nb_numa_nodes; - guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes * - sizeof *guest_info->node_mem); + guest_info->node_mem = g_malloc0(guest_info->numa_nodes * + sizeof *guest_info->node_mem); + for (i = 0; i < nb_numa_nodes; i++) { + guest_info->node_mem[i] = numa_info[i].node_mem; + } + guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * sizeof *guest_info->node_cpu); @@ -1086,7 +1090,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, unsigned int apic_id = x86_cpu_apic_id_from_index(i); assert(apic_id < guest_info->apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, node_cpumask[j])) { + if (test_bit(i, numa_info[j].node_cpu)) { guest_info->node_cpu[apic_id] = j; break; } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 93d02c1..6fd2d95 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -531,8 +531,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) int i, off; /* memory node(s) */ - if (nb_numa_nodes > 1 && node_mem[0] < ram_size) { - node0_size = node_mem[0]; + if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { + node0_size = numa_info[0].node_mem; } else { node0_size = ram_size; } @@ -570,7 +570,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (mem_start >= ram_size) { node_size = 0; } else { - node_size = node_mem[i]; + node_size = numa_info[i].node_mem; if (node_size > ram_size - mem_start) { node_size = ram_size - mem_start; } @@ -697,7 +697,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Update the RMA size if necessary */ if (spapr->vrma_adjust) { - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : + ram_size; spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift); } } @@ -1115,7 +1116,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); hwaddr rma_alloc_size; - hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size; uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 2509649..d873b42 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -9,6 +9,7 @@ #include "qapi-types.h" #include "qemu/notify.h" #include "qemu/main-loop.h" +#include "qemu/bitmap.h" /* vl.c */ @@ -134,8 +135,11 @@ extern QEMUClockType rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; -extern uint64_t node_mem[MAX_NODES]; -extern unsigned long *node_cpumask[MAX_NODES]; +typedef struct node_info { + uint64_t node_mem; + DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); +} NodeInfo; +extern NodeInfo numa_info[MAX_NODES]; void numa_add(const char *optarg); void set_numa_nodes(void); void set_numa_modes(void); diff --git a/monitor.c b/monitor.c index 690c152..0284735 100644 --- a/monitor.c +++ b/monitor.c @@ -2004,7 +2004,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict) } monitor_printf(mon, "\n"); monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, - node_mem[i] >> 20); + numa_info[i].node_mem >> 20); } } diff --git a/numa.c b/numa.c index d12a4f2..c3eca78 100644 --- a/numa.c +++ b/numa.c @@ -61,7 +61,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus) goto error; } - bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); + bitmap_set(numa_info[nodenr].node_cpu, value, endvalue - value + 1); return; error: @@ -101,7 +101,7 @@ void numa_add(const char *optarg) } if (get_param_value(option, 128, "mem", optarg) == 0) { - node_mem[nodenr] = 0; + numa_info[nodenr].node_mem = 0; } else { int64_t sval; sval = strtosz(option, &endptr); @@ -109,7 +109,7 @@ void numa_add(const char *optarg) fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg); exit(1); } - node_mem[nodenr] = sval; + numa_info[nodenr].node_mem = sval; } if (get_param_value(option, 128, "cpus", optarg) != 0) { numa_node_parse_cpus(nodenr, option); @@ -134,7 +134,7 @@ void set_numa_nodes(void) * and distribute the available memory equally across all nodes */ for (i = 0; i < nb_numa_nodes; i++) { - if (node_mem[i] != 0) { + if (numa_info[i].node_mem != 0) { break; } } @@ -145,15 +145,16 @@ void set_numa_nodes(void) * the final node gets the rest. */ for (i = 0; i < nb_numa_nodes - 1; i++) { - node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1); - usedmem += node_mem[i]; + numa_info[i].node_mem = (ram_size / nb_numa_nodes) & + ~((1 << 23UL) - 1); + usedmem += numa_info[i].node_mem; } - node_mem[i] = ram_size - usedmem; + numa_info[i].node_mem = ram_size - usedmem; } uint64_t numa_total = 0; for (i = 0; i < nb_numa_nodes; i++) { - numa_total += node_mem[i]; + numa_total += numa_info[i].node_mem; } if (numa_total != ram_size) { fprintf(stderr, "qemu: numa nodes total memory size " @@ -162,7 +163,7 @@ void set_numa_nodes(void) } for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { + if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { break; } } @@ -172,7 +173,7 @@ void set_numa_nodes(void) */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - set_bit(i, node_cpumask[i % nb_numa_nodes]); + set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); } } } @@ -185,7 +186,7 @@ void set_numa_modes(void) CPU_FOREACH(cpu) { for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(cpu->cpu_index, node_cpumask[i])) { + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { cpu->numa_node = i; } } diff --git a/vl.c b/vl.c index 0adac0c..915f8b7 100644 --- a/vl.c +++ b/vl.c @@ -196,8 +196,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order); int nb_numa_nodes; -uint64_t node_mem[MAX_NODES]; -unsigned long *node_cpumask[MAX_NODES]; +NodeInfo numa_info[MAX_NODES]; uint8_t qemu_uuid[16]; bool qemu_uuid_set; @@ -2787,8 +2786,8 @@ int main(int argc, char **argv, char **envp) translation = BIOS_ATA_TRANSLATION_AUTO; for (i = 0; i < MAX_NODES; i++) { - node_mem[i] = 0; - node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS); + numa_info[i].node_mem = 0; + bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); } nb_numa_nodes = 0;