Patchwork [v3,03/34] NUMA: Add numa_info structure to contain numa nodes info

login
register
mail settings
Submitter Hu Tao
Date March 26, 2014, 10:36 a.m.
Message ID <dd933c30054f8d4f600f88a1f5801ae7b9741f41.1395825870.git.hutao@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/333915/
State New
Headers show

Comments

Hu Tao - March 26, 2014, 10:36 a.m.
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>

Add the numa_info structure to contain the numa nodes memory,
VCPUs information and the future added numa nodes host memory
policies.

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
[Fix hw/ppc/spapr.c - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/pc.c            | 12 ++++++++----
 hw/ppc/spapr.c          | 11 ++++++-----
 include/sysemu/sysemu.h |  8 ++++++--
 monitor.c               |  2 +-
 numa.c                  | 23 ++++++++++++-----------
 vl.c                    |  7 +++----
 6 files changed, 36 insertions(+), 27 deletions(-)

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 14f0d91..249e504 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -675,14 +675,14 @@  static FWCfgState *bochs_bios_init(void)
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
         assert(apic_id < apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, node_cpumask[j])) {
+            if (test_bit(i, numa_info[j].node_cpu)) {
                 numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
                 break;
             }
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
-        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
+        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
     }
     fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
                      (1 + apic_id_limit + nb_numa_nodes) *
@@ -1093,8 +1093,12 @@  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
     guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
     guest_info->apic_xrupt_override = kvm_allows_irq0_override();
     guest_info->numa_nodes = nb_numa_nodes;
-    guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
+    guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
                                     sizeof *guest_info->node_mem);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        guest_info->node_mem[i] = numa_info[i].node_mem;
+    }
+
     guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
                                      sizeof *guest_info->node_cpu);
 
@@ -1102,7 +1106,7 @@  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
         assert(apic_id < guest_info->apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, node_cpumask[j])) {
+            if (test_bit(i, numa_info[j].node_cpu)) {
                 guest_info->node_cpu[apic_id] = j;
                 break;
             }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a11e121..89f71a8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -538,8 +538,8 @@  static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     int i, off;
 
     /* memory node(s) */
-    if (nb_numa_nodes > 1 && node_mem[0] < ram_size) {
-        node0_size = node_mem[0];
+    if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
+        node0_size = numa_info[0].node_mem;
     } else {
         node0_size = ram_size;
     }
@@ -577,7 +577,7 @@  static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
-            node_size = node_mem[i];
+            node_size = numa_info[i].node_mem;
             if (node_size > ram_size - mem_start) {
                 node_size = ram_size - mem_start;
             }
@@ -722,7 +722,8 @@  static void spapr_reset_htab(sPAPREnvironment *spapr)
 
     /* Update the RMA size if necessary */
     if (spapr->vrma_adjust) {
-        hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+        hwaddr node0_size = (nb_numa_nodes > 1) ?
+            numa_info[0].node_mem : ram_size;
         spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
     }
 }
@@ -1155,7 +1156,7 @@  static void ppc_spapr_init(QEMUMachineInitArgs *args)
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     hwaddr rma_alloc_size;
-    hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+    hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
     long load_limit, rtas_limit, fw_size;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 565c8f6..3a9308b 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -9,6 +9,7 @@ 
 #include "qapi-types.h"
 #include "qemu/notify.h"
 #include "qemu/main-loop.h"
+#include "qemu/bitmap.h"
 
 /* vl.c */
 
@@ -142,8 +143,11 @@  extern QEMUClockType rtc_clock;
 #define MAX_CPUMASK_BITS 255
 
 extern int nb_numa_nodes;
-extern uint64_t node_mem[MAX_NODES];
-extern unsigned long *node_cpumask[MAX_NODES];
+typedef struct node_info {
+    uint64_t node_mem;
+    DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+} NodeInfo;
+extern NodeInfo numa_info[MAX_NODES];
 void numa_add(const char *optarg);
 void set_numa_nodes(void);
 void set_numa_modes(void);
diff --git a/monitor.c b/monitor.c
index 342e83b..39f2518 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2031,7 +2031,7 @@  static void do_info_numa(Monitor *mon, const QDict *qdict)
         }
         monitor_printf(mon, "\n");
         monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
-            node_mem[i] >> 20);
+            numa_info[i].node_mem >> 20);
     }
 }
 
diff --git a/numa.c b/numa.c
index 1d2f761..48d8bcc 100644
--- a/numa.c
+++ b/numa.c
@@ -66,7 +66,7 @@  static void numa_node_parse_cpus(int nodenr, const char *cpus)
         goto error;
     }
 
-    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
+    bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
     return;
 
 error:
@@ -106,7 +106,7 @@  void numa_add(const char *optarg)
         }
 
         if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
+            numa_info[nodenr].node_mem = 0;
         } else {
             int64_t sval;
             sval = strtosz(option, &endptr);
@@ -114,7 +114,7 @@  void numa_add(const char *optarg)
                 fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
                 exit(1);
             }
-            node_mem[nodenr] = sval;
+            numa_info[nodenr].node_mem = sval;
         }
         if (get_param_value(option, 128, "cpus", optarg) != 0) {
             numa_node_parse_cpus(nodenr, option);
@@ -140,7 +140,7 @@  void set_numa_nodes(void)
          * and distribute the available memory equally across all nodes
          */
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_mem[i] != 0) {
+            if (numa_info[i].node_mem != 0) {
                 break;
             }
         }
@@ -151,15 +151,16 @@  void set_numa_nodes(void)
              * the final node gets the rest.
              */
             for (i = 0; i < nb_numa_nodes - 1; i++) {
-                node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
-                usedmem += node_mem[i];
+                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
+                                        ~((1 << 23UL) - 1);
+                usedmem += numa_info[i].node_mem;
             }
-            node_mem[i] = ram_size - usedmem;
+            numa_info[i].node_mem = ram_size - usedmem;
         }
 
         numa_total = 0;
         for (i = 0; i < nb_numa_nodes; i++) {
-            numa_total += node_mem[i];
+            numa_total += numa_info[i].node_mem;
         }
         if (numa_total != ram_size) {
             error_report("qemu: total memory size for NUMA nodes (%" PRIu64 ")"
@@ -169,7 +170,7 @@  void set_numa_nodes(void)
         }
 
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
+            if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
                 break;
             }
         }
@@ -179,7 +180,7 @@  void set_numa_nodes(void)
          */
         if (i == nb_numa_nodes) {
             for (i = 0; i < max_cpus; i++) {
-                set_bit(i, node_cpumask[i % nb_numa_nodes]);
+                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
             }
         }
     }
@@ -192,7 +193,7 @@  void set_numa_modes(void)
 
     CPU_FOREACH(cpu) {
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
+            if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
                 cpu->numa_node = i;
             }
         }
diff --git a/vl.c b/vl.c
index c05b099..0027164 100644
--- a/vl.c
+++ b/vl.c
@@ -195,8 +195,7 @@  static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
     QTAILQ_HEAD_INITIALIZER(fw_boot_order);
 
 int nb_numa_nodes;
-uint64_t node_mem[MAX_NODES];
-unsigned long *node_cpumask[MAX_NODES];
+NodeInfo numa_info[MAX_NODES];
 
 uint8_t qemu_uuid[16];
 bool qemu_uuid_set;
@@ -2921,8 +2920,8 @@  int main(int argc, char **argv, char **envp)
     translation = BIOS_ATA_TRANSLATION_AUTO;
 
     for (i = 0; i < MAX_NODES; i++) {
-        node_mem[i] = 0;
-        node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
+        numa_info[i].node_mem = 0;
+        bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
     }
 
     nb_numa_nodes = 0;