diff mbox

[V9,07/12] NUMA: parse guest numa nodes memory policy

Message ID 1377231003-2816-8-git-send-email-gaowanlong@cn.fujitsu.com
State New
Headers show

Commit Message

Wanlong Gao Aug. 23, 2013, 4:09 a.m. UTC
The memory policy setting format is like:
    policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N
And we are adding this setting as a suboption of "-numa mem,",
the memory policy then can be set like following:
    -numa node,nodeid=0,cpus=0 \
    -numa node,nodeid=1,cpus=1 \
    -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
    -numa mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
---
 include/sysemu/sysemu.h |  3 +++
 numa.c                  | 13 +++++++++++++
 qapi-schema.json        | 31 +++++++++++++++++++++++++++++--
 vl.c                    |  3 +++
 4 files changed, 48 insertions(+), 2 deletions(-)

Comments

Andrew Jones Aug. 23, 2013, 2:11 p.m. UTC | #1
----- Original Message -----
> The memory policy setting format is like:
>     policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N
> And we are adding this setting as a suboption of "-numa mem,",
> the memory policy then can be set like following:
>     -numa node,nodeid=0,cpus=0 \
>     -numa node,nodeid=1,cpus=1 \
>     -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
>     -numa mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1
> 
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> ---
>  include/sysemu/sysemu.h |  3 +++
>  numa.c                  | 13 +++++++++++++
>  qapi-schema.json        | 31 +++++++++++++++++++++++++++++--
>  vl.c                    |  3 +++
>  4 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index b683d08..81d16a5 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -134,6 +134,9 @@ extern int nb_numa_mem_nodes;
>  typedef struct node_info {
>      uint64_t node_mem;
>      DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
> +    DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS);
> +    NumaNodePolicy policy;
> +    bool relative;
>  } NodeInfo;
>  extern NodeInfo numa_info[MAX_NODES];
>  extern QemuOptsList qemu_numa_opts;
> diff --git a/numa.c b/numa.c
> index 3e2dfc1..4ccc6cb 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -74,6 +74,7 @@ static int numa_mem_parse(NumaMemOptions *opts)
>  {
>      uint16_t nodenr;
>      uint64_t mem_size;
> +    uint16List *nodes;
>  
>      if (opts->has_nodeid) {
>          nodenr = opts->nodeid;
> @@ -91,6 +92,18 @@ static int numa_mem_parse(NumaMemOptions *opts)
>          numa_info[nodenr].node_mem = mem_size;
>      }
>  
> +    if (opts->has_policy) {
> +        numa_info[nodenr].policy = opts->policy;
> +    }
> +
> +    if (opts->has_relative) {
> +        numa_info[nodenr].relative = opts->relative;
> +    }
> +
> +    for (nodes = opts->host_nodes; nodes; nodes = nodes->next) {
> +        bitmap_set(numa_info[nodenr].host_mem, nodes->value, 1);
> +    }
> +
>      return 0;
>  }
>  
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 11851a1..650741f 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -3806,6 +3806,24 @@
>     '*mem':    'str' }}
>  
>  ##
> +# @NumaNodePolicy
> +#
> +# NUMA node policy types
> +#
> +# @default: restore default policy, remove any nondefault policy
> +#
> +# @membind: a strict policy that restricts memory allocation to the
> +#           nodes specified
> +#
> +# @interleave: the page allocations is interleaved across the set
> +#              of nodes specified
> +#
> +# @preferred: set the preferred node for allocation
> +##
> +{ 'enum': 'NumaNodePolicy',
> +  'data': [ 'default', 'membind', 'interleave', 'preferred' ] }
> +
> +##
>  # @NumaMemOptions
>  #
>  # Set memory information of guest NUMA node. (for OptsVisitor)
> @@ -3814,9 +3832,18 @@
>  #
>  # @size: #optional memory size of this node
>  #
> +# @policy: #optional memory policy of this node
> +#
> +# @relative: #optional if the nodes specified are relative
> +#
> +# @host-nodes: #optional host nodes for its memory policy
> +#
>  # Since 1.7
>  ##
>  { 'type': 'NumaMemOptions',
>    'data': {
> -   '*nodeid': 'uint16',
> -   '*size':   'size' }}
> +   '*nodeid':     'uint16',
> +   '*size':       'size',
> +   '*policy':     'NumaNodePolicy',
> +   '*relative':   'bool',
> +   '*host-nodes': ['uint16'] }}
> diff --git a/vl.c b/vl.c
> index 2377b67..91b0d76 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2888,6 +2888,9 @@ int main(int argc, char **argv, char **envp)
>      for (i = 0; i < MAX_NODES; i++) {
>          numa_info[i].node_mem = 0;
>          bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
> +        bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS);

Shouldn't the bitmap size of host_mem be MAX_NODES? If so, and you
change it, then make sure the find_last_bit() call also gets
updated in patch 8/12, and anywhere else needed.

drew

> +        numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT;
> +        numa_info[i].relative = false;
>      }
>  
>      nb_numa_nodes = 0;
> --
> 1.8.4.rc4
> 
> 
>
Wanlong Gao Aug. 26, 2013, 1:07 a.m. UTC | #2
On 08/23/2013 10:11 PM, Andrew Jones wrote:
> 
> 
> ----- Original Message -----
>> The memory policy setting format is like:
>>     policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N
>> And we are adding this setting as a suboption of "-numa mem,",
>> the memory policy then can be set like following:
>>     -numa node,nodeid=0,cpus=0 \
>>     -numa node,nodeid=1,cpus=1 \
>>     -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
>>     -numa mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1
>>
>> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
>> ---
>>  include/sysemu/sysemu.h |  3 +++
>>  numa.c                  | 13 +++++++++++++
>>  qapi-schema.json        | 31 +++++++++++++++++++++++++++++--
>>  vl.c                    |  3 +++
>>  4 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>> index b683d08..81d16a5 100644
>> --- a/include/sysemu/sysemu.h
>> +++ b/include/sysemu/sysemu.h
>> @@ -134,6 +134,9 @@ extern int nb_numa_mem_nodes;
>>  typedef struct node_info {
>>      uint64_t node_mem;
>>      DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
>> +    DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS);
>> +    NumaNodePolicy policy;
>> +    bool relative;
>>  } NodeInfo;
>>  extern NodeInfo numa_info[MAX_NODES];
>>  extern QemuOptsList qemu_numa_opts;
>> diff --git a/numa.c b/numa.c
>> index 3e2dfc1..4ccc6cb 100644
>> --- a/numa.c
>> +++ b/numa.c
>> @@ -74,6 +74,7 @@ static int numa_mem_parse(NumaMemOptions *opts)
>>  {
>>      uint16_t nodenr;
>>      uint64_t mem_size;
>> +    uint16List *nodes;
>>  
>>      if (opts->has_nodeid) {
>>          nodenr = opts->nodeid;
>> @@ -91,6 +92,18 @@ static int numa_mem_parse(NumaMemOptions *opts)
>>          numa_info[nodenr].node_mem = mem_size;
>>      }
>>  
>> +    if (opts->has_policy) {
>> +        numa_info[nodenr].policy = opts->policy;
>> +    }
>> +
>> +    if (opts->has_relative) {
>> +        numa_info[nodenr].relative = opts->relative;
>> +    }
>> +
>> +    for (nodes = opts->host_nodes; nodes; nodes = nodes->next) {
>> +        bitmap_set(numa_info[nodenr].host_mem, nodes->value, 1);
>> +    }
>> +
>>      return 0;
>>  }
>>  
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index 11851a1..650741f 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -3806,6 +3806,24 @@
>>     '*mem':    'str' }}
>>  
>>  ##
>> +# @NumaNodePolicy
>> +#
>> +# NUMA node policy types
>> +#
>> +# @default: restore default policy, remove any nondefault policy
>> +#
>> +# @membind: a strict policy that restricts memory allocation to the
>> +#           nodes specified
>> +#
>> +# @interleave: the page allocations is interleaved across the set
>> +#              of nodes specified
>> +#
>> +# @preferred: set the preferred node for allocation
>> +##
>> +{ 'enum': 'NumaNodePolicy',
>> +  'data': [ 'default', 'membind', 'interleave', 'preferred' ] }
>> +
>> +##
>>  # @NumaMemOptions
>>  #
>>  # Set memory information of guest NUMA node. (for OptsVisitor)
>> @@ -3814,9 +3832,18 @@
>>  #
>>  # @size: #optional memory size of this node
>>  #
>> +# @policy: #optional memory policy of this node
>> +#
>> +# @relative: #optional if the nodes specified are relative
>> +#
>> +# @host-nodes: #optional host nodes for its memory policy
>> +#
>>  # Since 1.7
>>  ##
>>  { 'type': 'NumaMemOptions',
>>    'data': {
>> -   '*nodeid': 'uint16',
>> -   '*size':   'size' }}
>> +   '*nodeid':     'uint16',
>> +   '*size':       'size',
>> +   '*policy':     'NumaNodePolicy',
>> +   '*relative':   'bool',
>> +   '*host-nodes': ['uint16'] }}
>> diff --git a/vl.c b/vl.c
>> index 2377b67..91b0d76 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2888,6 +2888,9 @@ int main(int argc, char **argv, char **envp)
>>      for (i = 0; i < MAX_NODES; i++) {
>>          numa_info[i].node_mem = 0;
>>          bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
>> +        bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS);
> 
> Shouldn't the bitmap size of host_mem be MAX_NODES? If so, and you

MAX_NODES is for guest numa nodes number, but this bitmap is for host
numa nodes. AFAIK, this MAX_NODES is not big enough for host nodes number,
the default host kernel NODES_SHIFT is 9.

Thanks,
Wanlong Gao

> change it, then make sure the find_last_bit() call also gets
> updated in patch 8/12, and anywhere else needed.
> 
> drew
> 
>> +        numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT;
>> +        numa_info[i].relative = false;
>>      }
>>  
>>      nb_numa_nodes = 0;
>> --
>> 1.8.4.rc4
>>
>>
>>
>
Andrew Jones Aug. 26, 2013, 7:12 a.m. UTC | #3
----- Original Message -----
> On 08/23/2013 10:11 PM, Andrew Jones wrote:
> > 
> > 
> > ----- Original Message -----
> >> The memory policy setting format is like:
> >>     policy={default|membind|interleave|preferred}[,relative=true],host-nodes=N-N
> >> And we are adding this setting as a suboption of "-numa mem,",
> >> the memory policy then can be set like following:
> >>     -numa node,nodeid=0,cpus=0 \
> >>     -numa node,nodeid=1,cpus=1 \
> >>     -numa mem,nodeid=0,size=1G,policy=membind,host-nodes=0-1 \
> >>     -numa
> >>     mem,nodeid=1,size=1G,policy=interleave,relative=true,host-nodes=1
> >>
> >> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> >> ---
> >>  include/sysemu/sysemu.h |  3 +++
> >>  numa.c                  | 13 +++++++++++++
> >>  qapi-schema.json        | 31 +++++++++++++++++++++++++++++--
> >>  vl.c                    |  3 +++
> >>  4 files changed, 48 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> >> index b683d08..81d16a5 100644
> >> --- a/include/sysemu/sysemu.h
> >> +++ b/include/sysemu/sysemu.h
> >> @@ -134,6 +134,9 @@ extern int nb_numa_mem_nodes;
> >>  typedef struct node_info {
> >>      uint64_t node_mem;
> >>      DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
> >> +    DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS);
> >> +    NumaNodePolicy policy;
> >> +    bool relative;
> >>  } NodeInfo;
> >>  extern NodeInfo numa_info[MAX_NODES];
> >>  extern QemuOptsList qemu_numa_opts;
> >> diff --git a/numa.c b/numa.c
> >> index 3e2dfc1..4ccc6cb 100644
> >> --- a/numa.c
> >> +++ b/numa.c
> >> @@ -74,6 +74,7 @@ static int numa_mem_parse(NumaMemOptions *opts)
> >>  {
> >>      uint16_t nodenr;
> >>      uint64_t mem_size;
> >> +    uint16List *nodes;
> >>  
> >>      if (opts->has_nodeid) {
> >>          nodenr = opts->nodeid;
> >> @@ -91,6 +92,18 @@ static int numa_mem_parse(NumaMemOptions *opts)
> >>          numa_info[nodenr].node_mem = mem_size;
> >>      }
> >>  
> >> +    if (opts->has_policy) {
> >> +        numa_info[nodenr].policy = opts->policy;
> >> +    }
> >> +
> >> +    if (opts->has_relative) {
> >> +        numa_info[nodenr].relative = opts->relative;
> >> +    }
> >> +
> >> +    for (nodes = opts->host_nodes; nodes; nodes = nodes->next) {
> >> +        bitmap_set(numa_info[nodenr].host_mem, nodes->value, 1);
> >> +    }
> >> +
> >>      return 0;
> >>  }
> >>  
> >> diff --git a/qapi-schema.json b/qapi-schema.json
> >> index 11851a1..650741f 100644
> >> --- a/qapi-schema.json
> >> +++ b/qapi-schema.json
> >> @@ -3806,6 +3806,24 @@
> >>     '*mem':    'str' }}
> >>  
> >>  ##
> >> +# @NumaNodePolicy
> >> +#
> >> +# NUMA node policy types
> >> +#
> >> +# @default: restore default policy, remove any nondefault policy
> >> +#
> >> +# @membind: a strict policy that restricts memory allocation to the
> >> +#           nodes specified
> >> +#
> >> +# @interleave: the page allocations is interleaved across the set
> >> +#              of nodes specified
> >> +#
> >> +# @preferred: set the preferred node for allocation
> >> +##
> >> +{ 'enum': 'NumaNodePolicy',
> >> +  'data': [ 'default', 'membind', 'interleave', 'preferred' ] }
> >> +
> >> +##
> >>  # @NumaMemOptions
> >>  #
> >>  # Set memory information of guest NUMA node. (for OptsVisitor)
> >> @@ -3814,9 +3832,18 @@
> >>  #
> >>  # @size: #optional memory size of this node
> >>  #
> >> +# @policy: #optional memory policy of this node
> >> +#
> >> +# @relative: #optional if the nodes specified are relative
> >> +#
> >> +# @host-nodes: #optional host nodes for its memory policy
> >> +#
> >>  # Since 1.7
> >>  ##
> >>  { 'type': 'NumaMemOptions',
> >>    'data': {
> >> -   '*nodeid': 'uint16',
> >> -   '*size':   'size' }}
> >> +   '*nodeid':     'uint16',
> >> +   '*size':       'size',
> >> +   '*policy':     'NumaNodePolicy',
> >> +   '*relative':   'bool',
> >> +   '*host-nodes': ['uint16'] }}
> >> diff --git a/vl.c b/vl.c
> >> index 2377b67..91b0d76 100644
> >> --- a/vl.c
> >> +++ b/vl.c
> >> @@ -2888,6 +2888,9 @@ int main(int argc, char **argv, char **envp)
> >>      for (i = 0; i < MAX_NODES; i++) {
> >>          numa_info[i].node_mem = 0;
> >>          bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
> >> +        bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS);
> > 
> > Shouldn't the bitmap size of host_mem be MAX_NODES? If so, and you
> 
> MAX_NODES is for guest numa nodes number, but this bitmap is for host
> numa nodes. AFAIK, this MAX_NODES is not big enough for host nodes number,
> the default host kernel NODES_SHIFT is 9.

MAX_CPUMASK_BITS == 255 is also too small for a default node shift of 9.
You have to pick something, and then manage that limit. I think MAX_NODES
== 64 will be big enough for quite some time, but libnuma chooses 128 (see
/usr/include/numa.h:NUMA_NUM_NODES). So maybe we can bump MAX_NODES up to
128? You can also add a warning for when you detect starting on a machine
that has more than MAX_NODES nodes.

drew

> 
> Thanks,
> Wanlong Gao
> 
> > change it, then make sure the find_last_bit() call also gets
> > updated in patch 8/12, and anywhere else needed.
> > 
> > drew
> > 
> >> +        numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT;
> >> +        numa_info[i].relative = false;
> >>      }
> >>  
> >>      nb_numa_nodes = 0;
> >> --
> >> 1.8.4.rc4
> >>
> >>
> >>
> > 
> 
>
diff mbox

Patch

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index b683d08..81d16a5 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -134,6 +134,9 @@  extern int nb_numa_mem_nodes;
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
+    DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS);
+    NumaNodePolicy policy;
+    bool relative;
 } NodeInfo;
 extern NodeInfo numa_info[MAX_NODES];
 extern QemuOptsList qemu_numa_opts;
diff --git a/numa.c b/numa.c
index 3e2dfc1..4ccc6cb 100644
--- a/numa.c
+++ b/numa.c
@@ -74,6 +74,7 @@  static int numa_mem_parse(NumaMemOptions *opts)
 {
     uint16_t nodenr;
     uint64_t mem_size;
+    uint16List *nodes;
 
     if (opts->has_nodeid) {
         nodenr = opts->nodeid;
@@ -91,6 +92,18 @@  static int numa_mem_parse(NumaMemOptions *opts)
         numa_info[nodenr].node_mem = mem_size;
     }
 
+    if (opts->has_policy) {
+        numa_info[nodenr].policy = opts->policy;
+    }
+
+    if (opts->has_relative) {
+        numa_info[nodenr].relative = opts->relative;
+    }
+
+    for (nodes = opts->host_nodes; nodes; nodes = nodes->next) {
+        bitmap_set(numa_info[nodenr].host_mem, nodes->value, 1);
+    }
+
     return 0;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 11851a1..650741f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3806,6 +3806,24 @@ 
    '*mem':    'str' }}
 
 ##
+# @NumaNodePolicy
+#
+# NUMA node policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @membind: a strict policy that restricts memory allocation to the
+#           nodes specified
+#
+# @interleave: the page allocations is interleaved across the set
+#              of nodes specified
+#
+# @preferred: set the preferred node for allocation
+##
+{ 'enum': 'NumaNodePolicy',
+  'data': [ 'default', 'membind', 'interleave', 'preferred' ] }
+
+##
 # @NumaMemOptions
 #
 # Set memory information of guest NUMA node. (for OptsVisitor)
@@ -3814,9 +3832,18 @@ 
 #
 # @size: #optional memory size of this node
 #
+# @policy: #optional memory policy of this node
+#
+# @relative: #optional if the nodes specified are relative
+#
+# @host-nodes: #optional host nodes for its memory policy
+#
 # Since 1.7
 ##
 { 'type': 'NumaMemOptions',
   'data': {
-   '*nodeid': 'uint16',
-   '*size':   'size' }}
+   '*nodeid':     'uint16',
+   '*size':       'size',
+   '*policy':     'NumaNodePolicy',
+   '*relative':   'bool',
+   '*host-nodes': ['uint16'] }}
diff --git a/vl.c b/vl.c
index 2377b67..91b0d76 100644
--- a/vl.c
+++ b/vl.c
@@ -2888,6 +2888,9 @@  int main(int argc, char **argv, char **envp)
     for (i = 0; i < MAX_NODES; i++) {
         numa_info[i].node_mem = 0;
         bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
+        bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS);
+        numa_info[i].policy = NUMA_NODE_POLICY_DEFAULT;
+        numa_info[i].relative = false;
     }
 
     nb_numa_nodes = 0;