diff mbox

[v19,05/11] numa: add -numa node, memdev= option

Message ID decdf8ba914702484025bf0432a1554f3669a06e.1393917248.git.hutao@cn.fujitsu.com
State New
Headers show

Commit Message

Hu Tao March 4, 2014, 7:28 a.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

This option provides the infrastructure for binding guest NUMA nodes
to host NUMA nodes.  For example:

 -object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \
 -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
 -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
 -numa node,nodeid=1,cpus=1,memdev=ram-node1

The option replaces "-numa mem".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
[don't include sysemu/hostmem.h in sysemu/sysemu.h to avoid mutual
 inclusion.]
---
 include/sysemu/sysemu.h |  3 +++
 numa.c                  | 66 +++++++++++++++++++++++++++++++++++++++++++++++--
 qapi-schema.json        |  6 ++++-
 3 files changed, 72 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index e9da760..40f1df9 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -137,9 +137,12 @@  extern QEMUClockType rtc_clock;
 #define MAX_NODES 128
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
+struct HostMemoryBackend;
+typedef struct HostMemoryBackend HostMemoryBackend;
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+    HostMemoryBackend *node_memdev;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 void set_numa_nodes(void);
diff --git a/numa.c b/numa.c
index 6441d14..8d05078 100644
--- a/numa.c
+++ b/numa.c
@@ -24,9 +24,12 @@ 
  */
 
 #include "sysemu/sysemu.h"
+#include "sysemu/hostmem.h"
 #include "qapi-visit.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/dealloc-visitor.h"
+#include "qapi/qmp/qerror.h"
+
 QemuOptsList qemu_numa_opts = {
     .name = "numa",
     .implied_opt_name = "type",
@@ -34,10 +37,13 @@  QemuOptsList qemu_numa_opts = {
     .desc = { { 0 } } /* validated with OptsVisitor */
 };
 
+static int have_memdevs = -1;
+
 static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts)
 {
     uint16_t nodenr;
     uint16List *cpus = NULL;
+    Error *local_err = NULL;
 
     if (node->has_nodeid) {
         nodenr = node->nodeid;
@@ -60,6 +66,20 @@  static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts)
         bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
     }
 
+    if (node->has_mem && node->has_memdev) {
+        fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n");
+        return -1;
+    }
+
+    if (have_memdevs == -1) {
+        have_memdevs = node->has_memdev;
+    }
+
+    if (node->has_memdev != have_memdevs) {
+        fprintf(stderr, "qemu: memdev option must be specified for either "
+                "all or no nodes\n");
+    }
+
     if (node->has_mem) {
         uint64_t mem_size = node->mem;
         const char *mem_str = qemu_opt_get(opts, "mem");
@@ -69,7 +89,19 @@  static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts)
         }
         numa_info[nodenr].node_mem = mem_size;
     }
+    if (node->has_memdev) {
+        Object *o;
+        o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
+        if (!o) {
+            error_setg(&local_err, "memdev=%s is ambiguous", node->memdev);
+            qerror_report_err(local_err);
+            return -1;
+        }
 
+        object_ref(o);
+        numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
+        numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+    }
     return 0;
 }
 
@@ -188,12 +220,42 @@  void set_numa_modes(void)
     }
 }
 
+static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
+                                           const char *name,
+                                           QEMUMachineInitArgs *args)
+{
+    uint64_t ram_size = args->ram_size;
+
+    memory_region_init_ram(mr, owner, name, ram_size);
+    vmstate_register_ram_global(mr);
+}
+
 void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
                                           const char *name,
                                           QEMUMachineInitArgs *args)
 {
     uint64_t ram_size = args->ram_size;
+    uint64_t addr = 0;
+    int i;
 
-    memory_region_init_ram(mr, owner, name, ram_size);
-    vmstate_register_ram_global(mr);
+    if (nb_numa_nodes == 0 || !have_memdevs) {
+        allocate_system_memory_nonnuma(mr, owner, name, args);
+        return;
+    }
+
+    memory_region_init(mr, owner, name, ram_size);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        Error *local_err = NULL;
+        uint64_t size = numa_info[i].node_mem;
+        HostMemoryBackend *backend = numa_info[i].node_memdev;
+        MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
+        if (local_err) {
+            qerror_report_err(local_err);
+            exit(1);
+        }
+
+        memory_region_add_subregion(mr, addr, seg);
+        vmstate_register_ram_global(seg);
+        addr += size;
+    }
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 951cc0a..62e0b83 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4542,6 +4542,9 @@ 
 #
 # @mem: #optional memory size of this node (equally divide total memory among
 #        nodes if omitted)
+# @memdev: #optional memory backend object.  If specified for one node,
+#          it must be specified for all nodes.
+#
 #
 # Since: 2.1
 ##
@@ -4549,4 +4552,5 @@ 
   'data': {
    '*nodeid': 'uint16',
    '*cpus':   ['uint16'],
-   '*mem':    'size' }}
+   '*mem':    'size',
+   '*memdev': 'str' }}