diff mbox

[V5,06/12] NUMA: parse guest numa nodes memory policy

Message ID 1374053373-30499-7-git-send-email-gaowanlong@cn.fujitsu.com
State New
Headers show

Commit Message

Wanlong Gao July 17, 2013, 9:29 a.m. UTC
The memory policy setting format is like:
    policy={membind|interleave|preferred},host-node=[+|!]{all|N-N}
And we are adding this setting as a suboption of "-numa mem,",
the memory policy then can be set like following:
    -numa node,nodeid=0,cpus=0 \
    -numa node,nodeid=1,cpus=1 \
    -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
    -numa mem,nodeid=1,size=1G,policy=interleave,host-nodes=!1

Reviewed-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
---
 include/sysemu/sysemu.h |  8 +++++
 numa.c                  | 83 +++++++++++++++++++++++++++++++++++++++++++++++++
 qapi-schema.json        |  8 ++++-
 vl.c                    |  2 ++
 4 files changed, 100 insertions(+), 1 deletion(-)

Comments

Eric Blake July 17, 2013, 12:31 p.m. UTC | #1
On 07/17/2013 03:29 AM, Wanlong Gao wrote:
> The memory policy setting format is like:
>     policy={membind|interleave|preferred},host-node=[+|!]{all|N-N}
> And we are adding this setting as a suboption of "-numa mem,",
> the memory policy then can be set like following:
>     -numa node,nodeid=0,cpus=0 \
>     -numa node,nodeid=1,cpus=1 \
>     -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
>     -numa mem,nodeid=1,size=1G,policy=interleave,host-nodes=!1
> 
> Reviewed-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> ---

> +++ b/qapi-schema.json
> @@ -3717,9 +3717,15 @@
>  #
>  # @size: #optional memory size of this node
>  #
> +# @policy: #optional memory policy of this node
> +#
> +# @host-nodes: #optional host nodes for its memory policy
> +#
>  # Since 1.6
>  ##
>  { 'type': 'NumaMemOptions',
>    'data': {
>     '*nodeid':		'int',
> -   '*size':		'size' }}
> +   '*size':		'size',
> +   '*policy':		'str',

What are the valid values for 'policy'?  If it is a finite set, please
make an 'enum' type that lists the valid values, and make this
'*policy':'NumaMemPolicy' rather than a free-form 'str'.

> +   '*host-nodes':	'str' }}

Missing documentation on how this 'str' is formatted, and same concerns
as in 1/12 about whether it is the right JSON representation, or whether
you have crammed too much information into a single string that now
requires post-processing.  Why is an array not a better choice?
Wanlong Gao July 17, 2013, 1:12 p.m. UTC | #2
On 07/17/2013 08:31 PM, Eric Blake wrote:
> On 07/17/2013 03:29 AM, Wanlong Gao wrote:
>> The memory policy setting format is like:
>>     policy={membind|interleave|preferred},host-node=[+|!]{all|N-N}
>> And we are adding this setting as a suboption of "-numa mem,",
>> the memory policy then can be set like following:
>>     -numa node,nodeid=0,cpus=0 \
>>     -numa node,nodeid=1,cpus=1 \
>>     -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
>>     -numa mem,nodeid=1,size=1G,policy=interleave,host-nodes=!1
>>
>> Reviewed-by: Bandan Das <bsd@redhat.com>
>> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
>> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
>> ---
> 
>> +++ b/qapi-schema.json
>> @@ -3717,9 +3717,15 @@
>>  #
>>  # @size: #optional memory size of this node
>>  #
>> +# @policy: #optional memory policy of this node
>> +#
>> +# @host-nodes: #optional host nodes for its memory policy
>> +#
>>  # Since 1.6
>>  ##
>>  { 'type': 'NumaMemOptions',
>>    'data': {
>>     '*nodeid':		'int',
>> -   '*size':		'size' }}
>> +   '*size':		'size',
>> +   '*policy':		'str',
> 
> What are the valid values for 'policy'?  If it is a finite set, please
> make an 'enum' type that lists the valid values, and make this
> '*policy':'NumaMemPolicy' rather than a free-form 'str'.

OK, will follow this. Luiz also suggested like this.

> 
>> +   '*host-nodes':	'str' }}
> 
> Missing documentation on how this 'str' is formatted, and same concerns
> as in 1/12 about whether it is the right JSON representation, or whether
> you have crammed too much information into a single string that now
> requires post-processing.  Why is an array not a better choice?

Will try to use array here, thank you for your suggestion.

Thanks,
Wanlong Gao

>
diff mbox

Patch

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 28fe305..af17c02 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -130,10 +130,18 @@  extern QEMUClock *rtc_clock;
 
 #define MAX_NODES 64
 #define MAX_CPUMASK_BITS 255
+#define NODE_HOST_NONE        0x00
+#define NODE_HOST_BIND        0x01
+#define NODE_HOST_INTERLEAVE  0x02
+#define NODE_HOST_PREFERRED   0x03
+#define NODE_HOST_POLICY_MASK 0x03
+#define NODE_HOST_RELATIVE    0x04
 extern int nb_numa_nodes;
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+    DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS);
+    unsigned int flags;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 extern QemuOptsList qemu_numa_opts;
diff --git a/numa.c b/numa.c
index 766e111..a2eceb1 100644
--- a/numa.c
+++ b/numa.c
@@ -96,6 +96,79 @@  static int numa_node_parse(NumaNodeOptions *opts)
     return numa_node_parse_cpus(nodenr, cpus);
 }
 
+static int numa_mem_parse_policy(int nodenr, const char *policy)
+{
+    if (!strcmp(policy, "interleave")) {
+        numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE;
+    } else if (!strcmp(policy, "preferred")) {
+        numa_info[nodenr].flags |= NODE_HOST_PREFERRED;
+    } else if (!strcmp(policy, "membind")) {
+        numa_info[nodenr].flags |= NODE_HOST_BIND;
+    } else {
+        fprintf(stderr, "qemu: Invalid memory policy: %s\n", policy);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int numa_mem_parse_hostnodes(int nodenr, const char *hostnodes)
+{
+    unsigned long long value, endvalue;
+    char *endptr;
+    bool clear = false;
+    unsigned long *bm = numa_info[nodenr].host_mem;
+
+    if (hostnodes[0] == '!') {
+        clear = true;
+        bitmap_fill(bm, MAX_CPUMASK_BITS);
+        hostnodes++;
+    }
+    if (hostnodes[0] == '+') {
+        numa_info[nodenr].flags |= NODE_HOST_RELATIVE;
+        hostnodes++;
+    }
+
+    if (!strcmp(hostnodes, "all")) {
+        bitmap_fill(bm, MAX_CPUMASK_BITS);
+        return 0;
+    }
+
+    if (parse_uint(hostnodes, &value, &endptr, 10) < 0)
+        goto error;
+    if (*endptr == '-') {
+        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
+            goto error;
+        }
+    } else if (*endptr == '\0') {
+        endvalue = value;
+    } else {
+        goto error;
+    }
+
+    if (endvalue >= MAX_CPUMASK_BITS) {
+        endvalue = MAX_CPUMASK_BITS - 1;
+        fprintf(stderr,
+            "qemu: NUMA: A max of %d host nodes are supported\n",
+             MAX_CPUMASK_BITS);
+    }
+
+    if (endvalue < value) {
+        goto error;
+    }
+
+    if (clear)
+        bitmap_clear(bm, value, endvalue - value + 1);
+    else
+        bitmap_set(bm, value, endvalue - value + 1);
+
+    return 0;
+
+error:
+    fprintf(stderr, "qemu: Invalid host NUMA nodes range: %s\n", hostnodes);
+    return -1;
+}
+
 static int numa_mem_parse(NumaMemOptions *opts)
 {
     uint64_t nodenr, mem_size;
@@ -110,6 +183,16 @@  static int numa_mem_parse(NumaMemOptions *opts)
     mem_size = opts->size;
     numa_info[nodenr].node_mem = mem_size;
 
+    const char *policy = opts->policy;
+    if (numa_mem_parse_policy(nodenr, policy) == -1) {
+        return -1;
+    }
+
+    const char *hostnodes = opts->host_nodes;
+    if (numa_mem_parse_hostnodes(nodenr, hostnodes) == -1) {
+        return -1;
+    }
+
     return 0;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index f753a35..49eac70 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3717,9 +3717,15 @@ 
 #
 # @size: #optional memory size of this node
 #
+# @policy: #optional memory policy of this node
+#
+# @host-nodes: #optional host nodes for its memory policy
+#
 # Since 1.6
 ##
 { 'type': 'NumaMemOptions',
   'data': {
    '*nodeid':		'int',
-   '*size':		'size' }}
+   '*size':		'size',
+   '*policy':		'str',
+   '*host-nodes':	'str' }}
diff --git a/vl.c b/vl.c
index 5fdba97..dc8131c 100644
--- a/vl.c
+++ b/vl.c
@@ -2887,6 +2887,8 @@  int main(int argc, char **argv, char **envp)
     for (i = 0; i < MAX_NODES; i++) {
         numa_info[i].node_mem = 0;
         bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
+        bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS);
+        numa_info[i].flags = NODE_HOST_NONE;
     }
 
     nb_numa_nodes = 0;