diff mbox

[V11,02/11] NUMA: split -numa option

Message ID 1377832250-1672-3-git-send-email-gaowanlong@cn.fujitsu.com
State New
Headers show

Commit Message

Wanlong Gao Aug. 30, 2013, 3:10 a.m. UTC
Change -numa option like following as Paolo suggested:
    -numa node,nodeid=0,cpus=0-1 \
    -numa mem,nodeid=0,size=1G

This new option will make later coming memory hotplug better.
And this new option is implemented using OptsVisitor.

And just remain "-numa node,mem=xx" as legacy.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
---
 Makefile.target         |   2 +-
 include/sysemu/sysemu.h |   3 +
 numa.c                  | 144 ++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-options.hx         |   6 +-
 vl.c                    | 113 ++++++-------------------------------
 5 files changed, 168 insertions(+), 100 deletions(-)
 create mode 100644 numa.c

Comments

Eduardo Habkost Sept. 4, 2013, 1:49 a.m. UTC | #1
On Fri, Aug 30, 2013 at 11:10:41AM +0800, Wanlong Gao wrote:
> Change -numa option like following as Paolo suggested:
>     -numa node,nodeid=0,cpus=0-1 \
>     -numa mem,nodeid=0,size=1G
> 
> This new option will make later coming memory hotplug better.
> And this new option is implemented using OptsVisitor.
> 
> And just remain "-numa node,mem=xx" as legacy.
> 
> Reviewed-by: Laszlo Ersek <lersek@redhat.com>
> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>

Would it be possible to first move the existing code as-is to numa.c,
then introduce qemu_numa_opts, and then introduce "-numa mem"? It would
make the patch much easier to review.

> ---
>  Makefile.target         |   2 +-
>  include/sysemu/sysemu.h |   3 +
>  numa.c                  | 144 ++++++++++++++++++++++++++++++++++++++++++++++++
>  qemu-options.hx         |   6 +-
>  vl.c                    | 113 ++++++-------------------------------
>  5 files changed, 168 insertions(+), 100 deletions(-)
>  create mode 100644 numa.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index 9a49852..7e1fddf 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -113,7 +113,7 @@ endif #CONFIG_BSD_USER
>  #########################################################
>  # System emulator target
>  ifdef CONFIG_SOFTMMU
> -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
> +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
>  obj-y += qtest.o
>  obj-y += hw/
>  obj-$(CONFIG_FDT) += device_tree.o
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index b1aa059..489b4b6 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -129,8 +129,11 @@ extern QEMUClockType rtc_clock;
>  #define MAX_NODES 64
>  #define MAX_CPUMASK_BITS 255
>  extern int nb_numa_nodes;
> +extern int nb_numa_mem_nodes;
>  extern uint64_t node_mem[MAX_NODES];
>  extern unsigned long *node_cpumask[MAX_NODES];
> +extern QemuOptsList qemu_numa_opts;
> +int numa_init_func(QemuOpts *opts, void *opaque);
>  
>  #define MAX_OPTION_ROMS 16
>  typedef struct QEMUOptionRom {
> diff --git a/numa.c b/numa.c
> new file mode 100644
> index 0000000..e6924f4
> --- /dev/null
> +++ b/numa.c
> @@ -0,0 +1,144 @@
> +/*
> + * QEMU System Emulator
> + *
> + * Copyright (c) 2013 Fujitsu Ltd.
> + * Author: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "sysemu/sysemu.h"
> +#include "qemu/bitmap.h"
> +#include "qapi-visit.h"
> +#include "qapi/opts-visitor.h"
> +#include "qapi/dealloc-visitor.h"
> +
> +QemuOptsList qemu_numa_opts = {
> +    .name = "numa",
> +    .implied_opt_name = "type",
> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
> +    .desc = { { 0 } } /* validated with OptsVisitor */
> +};
> +
> +static int numa_node_parse(NumaNodeOptions *opts)
> +{
> +    uint16_t nodenr;
> +    uint16List *cpus = NULL;
> +
> +    if (opts->has_nodeid) {
> +        nodenr = opts->nodeid;
> +        if (nodenr >= MAX_NODES) {
> +            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
> +                    PRIu16 "\n", nodenr);
> +            return -1;
> +        }
> +    } else {
> +        nodenr = nb_numa_nodes;
> +    }

If you make the (nodenr >= MAX_NODES) check unconditional, you simplify
the code and you won't need the nb_numa_nodes check at numa_init_func().

> +
> +    for (cpus = opts->cpus; cpus; cpus = cpus->next) {
> +        bitmap_set(node_cpumask[nodenr], cpus->value, 1);
> +    }

What if cpus->value > MAXCPUMASK_BITS?

> +
> +    if (opts->has_mem) {
> +        int64_t mem_size;
> +        char *endptr;
> +        mem_size = strtosz(opts->mem, &endptr);
> +        if (mem_size < 0 || *endptr) {
> +            fprintf(stderr, "qemu: invalid numa mem size: %s\n", opts->mem);
> +            return -1;
> +        }
> +        node_mem[nodenr] = mem_size;
> +    }
> +
> +    return 0;
> +}
> +
> +static int numa_mem_parse(NumaMemOptions *opts)
> +{
> +    uint16_t nodenr;
> +    uint64_t mem_size;
> +
> +    if (opts->has_nodeid) {
> +        nodenr = opts->nodeid;
> +        if (nodenr >= MAX_NODES) {
> +            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
> +                    PRIu16 "\n", nodenr);
> +            return -1;
> +        }
> +    } else {
> +        nodenr = nb_numa_mem_nodes;

What if nb_numa_mem_nodes > MAX_NODES?

> +    }
> +
> +    if (opts->has_size) {
> +        mem_size = opts->size;
> +        node_mem[nodenr] = mem_size;
> +    }
> +
> +    return 0;
> +}
> +
> +int numa_init_func(QemuOpts *opts, void *opaque)
> +{
> +    NumaOptions *object = NULL;
> +    Error *err = NULL;
> +    int ret = 0;
> +
> +    {
> +        OptsVisitor *ov = opts_visitor_new(opts);
> +        visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err);
> +        opts_visitor_cleanup(ov);
> +    }
> +
> +    if (error_is_set(&err)) {
> +        fprintf(stderr, "qemu: %s\n", error_get_pretty(err));
> +        error_free(err);
> +        ret = -1;
> +        goto error;
> +    }
> +
> +    switch (object->kind) {
> +    case NUMA_OPTIONS_KIND_NODE:
> +        if (nb_numa_nodes >= MAX_NODES) {
> +            fprintf(stderr, "qemu: too many NUMA nodes\n");
> +            ret = -1;
> +            goto error;
> +        }
> +        ret = numa_node_parse(object->node);
> +        nb_numa_nodes++;
> +        break;
> +    case NUMA_OPTIONS_KIND_MEM:
> +        ret = numa_mem_parse(object->mem);
> +        nb_numa_mem_nodes++;
> +        break;
> +    default:
> +        fprintf(stderr, "qemu: Invalid NUMA options type.\n");
> +        ret = -1;
> +    }
> +
> +error:
> +    if (object) {
> +        QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
> +        visit_type_NumaOptions(qapi_dealloc_get_visitor(dv),
> +                               &object, NULL, NULL);
> +        qapi_dealloc_visitor_cleanup(dv);
> +    }
> +
> +    return ret;
> +}
> diff --git a/qemu-options.hx b/qemu-options.hx
> index d15338e..e9123b8 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -95,11 +95,13 @@ specifies the maximum number of hotpluggable CPUs.
>  ETEXI
>  
>  DEF("numa", HAS_ARG, QEMU_OPTION_numa,
> -    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
> +    "-numa node[,nodeid=node][,cpus=cpu[-cpu]]\n"
> +    "-numa mem[,nodeid=node][,size=size]\n"
> +    , QEMU_ARCH_ALL)
>  STEXI
>  @item -numa @var{opts}
>  @findex -numa
> -Simulate a multi node NUMA system. If mem and cpus are omitted, resources
> +Simulate a multi node NUMA system. If @var{size} and @var{cpus} are omitted, resources
>  are split equally.
>  ETEXI
>  
> diff --git a/vl.c b/vl.c
> index dfbc071..0ef5c5a 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -250,6 +250,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
>      QTAILQ_HEAD_INITIALIZER(fw_boot_order);
>  
>  int nb_numa_nodes;
> +int nb_numa_mem_nodes;
>  uint64_t node_mem[MAX_NODES];
>  unsigned long *node_cpumask[MAX_NODES];
>  
> @@ -1330,102 +1331,6 @@ char *get_boot_devices_list(size_t *size)
>      return list;
>  }
>  
> -static void numa_node_parse_cpus(int nodenr, const char *cpus)
> -{
> -    char *endptr;
> -    unsigned long long value, endvalue;
> -
> -    /* Empty CPU range strings will be considered valid, they will simply
> -     * not set any bit in the CPU bitmap.
> -     */
> -    if (!*cpus) {
> -        return;
> -    }
> -
> -    if (parse_uint(cpus, &value, &endptr, 10) < 0) {
> -        goto error;
> -    }
> -    if (*endptr == '-') {
> -        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
> -            goto error;
> -        }
> -    } else if (*endptr == '\0') {
> -        endvalue = value;
> -    } else {
> -        goto error;
> -    }
> -
> -    if (endvalue >= MAX_CPUMASK_BITS) {
> -        endvalue = MAX_CPUMASK_BITS - 1;
> -        fprintf(stderr,
> -            "qemu: NUMA: A max of %d VCPUs are supported\n",
> -             MAX_CPUMASK_BITS);
> -    }
> -
> -    if (endvalue < value) {
> -        goto error;
> -    }
> -
> -    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
> -    return;
> -
> -error:
> -    fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
> -    exit(1);
> -}
> -
> -static void numa_add(const char *optarg)
> -{
> -    char option[128];
> -    char *endptr;
> -    unsigned long long nodenr;
> -
> -    optarg = get_opt_name(option, 128, optarg, ',');
> -    if (*optarg == ',') {
> -        optarg++;
> -    }
> -    if (!strcmp(option, "node")) {
> -
> -        if (nb_numa_nodes >= MAX_NODES) {
> -            fprintf(stderr, "qemu: too many NUMA nodes\n");
> -            exit(1);
> -        }
> -
> -        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
> -            nodenr = nb_numa_nodes;
> -        } else {
> -            if (parse_uint_full(option, &nodenr, 10) < 0) {
> -                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
> -                exit(1);
> -            }
> -        }
> -
> -        if (nodenr >= MAX_NODES) {
> -            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
> -            exit(1);
> -        }
> -
> -        if (get_param_value(option, 128, "mem", optarg) == 0) {
> -            node_mem[nodenr] = 0;
> -        } else {
> -            int64_t sval;
> -            sval = strtosz(option, &endptr);
> -            if (sval < 0 || *endptr) {
> -                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
> -                exit(1);
> -            }
> -            node_mem[nodenr] = sval;
> -        }
> -        if (get_param_value(option, 128, "cpus", optarg) != 0) {
> -            numa_node_parse_cpus(nodenr, option);
> -        }
> -        nb_numa_nodes++;
> -    } else {
> -        fprintf(stderr, "Invalid -numa option: %s\n", option);
> -        exit(1);
> -    }
> -}
> -
>  static QemuOptsList qemu_smp_opts = {
>      .name = "smp-opts",
>      .implied_opt_name = "cpus",
> @@ -2961,6 +2866,7 @@ int main(int argc, char **argv, char **envp)
>      qemu_add_opts(&qemu_tpmdev_opts);
>      qemu_add_opts(&qemu_realtime_opts);
>      qemu_add_opts(&qemu_msg_opts);
> +    qemu_add_opts(&qemu_numa_opts);
>  
>      runstate_init();
>  
> @@ -2986,6 +2892,7 @@ int main(int argc, char **argv, char **envp)
>      }
>  
>      nb_numa_nodes = 0;
> +    nb_numa_mem_nodes = 0;
>      nb_nics = 0;
>  
>      bdrv_init_with_whitelist();
> @@ -3147,7 +3054,10 @@ int main(int argc, char **argv, char **envp)
>                  }
>                  break;
>              case QEMU_OPTION_numa:
> -                numa_add(optarg);
> +                opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1);
> +                if (!opts) {
> +                    exit(1);
> +                }
>                  break;
>              case QEMU_OPTION_display:
>                  display_type = select_display(optarg);
> @@ -4228,6 +4138,15 @@ int main(int argc, char **argv, char **envp)
>  
>      register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
>  
> +    if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
> +                          NULL, 1) != 0) {
> +        exit(1);
> +    }
> +
> +    if (nb_numa_mem_nodes > nb_numa_nodes) {
> +        nb_numa_nodes = nb_numa_mem_nodes;
> +    }
> +
>      if (nb_numa_nodes > 0) {
>          int i;
>  
> -- 
> 1.8.4
> 
>
Wanlong Gao Sept. 4, 2013, 3 a.m. UTC | #2
On 09/04/2013 09:49 AM, Eduardo Habkost wrote:
> On Fri, Aug 30, 2013 at 11:10:41AM +0800, Wanlong Gao wrote:
>> Change -numa option like following as Paolo suggested:
>>     -numa node,nodeid=0,cpus=0-1 \
>>     -numa mem,nodeid=0,size=1G
>>
>> This new option will make later coming memory hotplug better.
>> And this new option is implemented using OptsVisitor.
>>
>> And just remain "-numa node,mem=xx" as legacy.
>>
>> Reviewed-by: Laszlo Ersek <lersek@redhat.com>
>> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> 
> Would it be possible to first move the existing code as-is to numa.c,
> then introduce qemu_numa_opts, and then introduce "-numa mem"? It would
> make the patch much easier to review.

I thought this patch is straightforward, but if you like I can split as you said. ;)

> 
>> ---
>>  Makefile.target         |   2 +-
>>  include/sysemu/sysemu.h |   3 +
>>  numa.c                  | 144 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  qemu-options.hx         |   6 +-
>>  vl.c                    | 113 ++++++-------------------------------
>>  5 files changed, 168 insertions(+), 100 deletions(-)
>>  create mode 100644 numa.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 9a49852..7e1fddf 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -113,7 +113,7 @@ endif #CONFIG_BSD_USER
>>  #########################################################
>>  # System emulator target
>>  ifdef CONFIG_SOFTMMU
>> -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
>> +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
>>  obj-y += qtest.o
>>  obj-y += hw/
>>  obj-$(CONFIG_FDT) += device_tree.o
>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>> index b1aa059..489b4b6 100644
>> --- a/include/sysemu/sysemu.h
>> +++ b/include/sysemu/sysemu.h
>> @@ -129,8 +129,11 @@ extern QEMUClockType rtc_clock;
>>  #define MAX_NODES 64
>>  #define MAX_CPUMASK_BITS 255
>>  extern int nb_numa_nodes;
>> +extern int nb_numa_mem_nodes;
>>  extern uint64_t node_mem[MAX_NODES];
>>  extern unsigned long *node_cpumask[MAX_NODES];
>> +extern QemuOptsList qemu_numa_opts;
>> +int numa_init_func(QemuOpts *opts, void *opaque);
>>  
>>  #define MAX_OPTION_ROMS 16
>>  typedef struct QEMUOptionRom {
>> diff --git a/numa.c b/numa.c
>> new file mode 100644
>> index 0000000..e6924f4
>> --- /dev/null
>> +++ b/numa.c
>> @@ -0,0 +1,144 @@
>> +/*
>> + * QEMU System Emulator
>> + *
>> + * Copyright (c) 2013 Fujitsu Ltd.
>> + * Author: Wanlong Gao <gaowanlong@cn.fujitsu.com>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "sysemu/sysemu.h"
>> +#include "qemu/bitmap.h"
>> +#include "qapi-visit.h"
>> +#include "qapi/opts-visitor.h"
>> +#include "qapi/dealloc-visitor.h"
>> +
>> +QemuOptsList qemu_numa_opts = {
>> +    .name = "numa",
>> +    .implied_opt_name = "type",
>> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
>> +    .desc = { { 0 } } /* validated with OptsVisitor */
>> +};
>> +
>> +static int numa_node_parse(NumaNodeOptions *opts)
>> +{
>> +    uint16_t nodenr;
>> +    uint16List *cpus = NULL;
>> +
>> +    if (opts->has_nodeid) {
>> +        nodenr = opts->nodeid;
>> +        if (nodenr >= MAX_NODES) {
>> +            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
>> +                    PRIu16 "\n", nodenr);
>> +            return -1;
>> +        }
>> +    } else {
>> +        nodenr = nb_numa_nodes;
>> +    }
> 
> If you make the (nodenr >= MAX_NODES) check unconditional, you simplify
> the code and you won't need the nb_numa_nodes check at numa_init_func().

Yeah, thank you.

> 
>> +
>> +    for (cpus = opts->cpus; cpus; cpus = cpus->next) {
>> +        bitmap_set(node_cpumask[nodenr], cpus->value, 1);
>> +    }
> 
> What if cpus->value > MAXCPUMASK_BITS?

Will check it.

> 
>> +
>> +    if (opts->has_mem) {
>> +        int64_t mem_size;
>> +        char *endptr;
>> +        mem_size = strtosz(opts->mem, &endptr);
>> +        if (mem_size < 0 || *endptr) {
>> +            fprintf(stderr, "qemu: invalid numa mem size: %s\n", opts->mem);
>> +            return -1;
>> +        }
>> +        node_mem[nodenr] = mem_size;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int numa_mem_parse(NumaMemOptions *opts)
>> +{
>> +    uint16_t nodenr;
>> +    uint64_t mem_size;
>> +
>> +    if (opts->has_nodeid) {
>> +        nodenr = opts->nodeid;
>> +        if (nodenr >= MAX_NODES) {
>> +            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
>> +                    PRIu16 "\n", nodenr);
>> +            return -1;
>> +        }
>> +    } else {
>> +        nodenr = nb_numa_mem_nodes;
> 
> What if nb_numa_mem_nodes > MAX_NODES?

Yeah, need check unconditional, thank you.

Regards,
Wanlong Gao
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index 9a49852..7e1fddf 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -113,7 +113,7 @@  endif #CONFIG_BSD_USER
 #########################################################
 # System emulator target
 ifdef CONFIG_SOFTMMU
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o
 obj-y += hw/
 obj-$(CONFIG_FDT) += device_tree.o
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index b1aa059..489b4b6 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -129,8 +129,11 @@  extern QEMUClockType rtc_clock;
 #define MAX_NODES 64
 #define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
+extern int nb_numa_mem_nodes;
 extern uint64_t node_mem[MAX_NODES];
 extern unsigned long *node_cpumask[MAX_NODES];
+extern QemuOptsList qemu_numa_opts;
+int numa_init_func(QemuOpts *opts, void *opaque);
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/numa.c b/numa.c
new file mode 100644
index 0000000..e6924f4
--- /dev/null
+++ b/numa.c
@@ -0,0 +1,144 @@ 
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2013 Fujitsu Ltd.
+ * Author: Wanlong Gao <gaowanlong@cn.fujitsu.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysemu/sysemu.h"
+#include "qemu/bitmap.h"
+#include "qapi-visit.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/dealloc-visitor.h"
+
+QemuOptsList qemu_numa_opts = {
+    .name = "numa",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+    .desc = { { 0 } } /* validated with OptsVisitor */
+};
+
+static int numa_node_parse(NumaNodeOptions *opts)
+{
+    uint16_t nodenr;
+    uint16List *cpus = NULL;
+
+    if (opts->has_nodeid) {
+        nodenr = opts->nodeid;
+        if (nodenr >= MAX_NODES) {
+            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
+                    PRIu16 "\n", nodenr);
+            return -1;
+        }
+    } else {
+        nodenr = nb_numa_nodes;
+    }
+
+    for (cpus = opts->cpus; cpus; cpus = cpus->next) {
+        bitmap_set(node_cpumask[nodenr], cpus->value, 1);
+    }
+
+    if (opts->has_mem) {
+        int64_t mem_size;
+        char *endptr;
+        mem_size = strtosz(opts->mem, &endptr);
+        if (mem_size < 0 || *endptr) {
+            fprintf(stderr, "qemu: invalid numa mem size: %s\n", opts->mem);
+            return -1;
+        }
+        node_mem[nodenr] = mem_size;
+    }
+
+    return 0;
+}
+
+static int numa_mem_parse(NumaMemOptions *opts)
+{
+    uint16_t nodenr;
+    uint64_t mem_size;
+
+    if (opts->has_nodeid) {
+        nodenr = opts->nodeid;
+        if (nodenr >= MAX_NODES) {
+            fprintf(stderr, "qemu: Max number of NUMA nodes reached: %"
+                    PRIu16 "\n", nodenr);
+            return -1;
+        }
+    } else {
+        nodenr = nb_numa_mem_nodes;
+    }
+
+    if (opts->has_size) {
+        mem_size = opts->size;
+        node_mem[nodenr] = mem_size;
+    }
+
+    return 0;
+}
+
+int numa_init_func(QemuOpts *opts, void *opaque)
+{
+    NumaOptions *object = NULL;
+    Error *err = NULL;
+    int ret = 0;
+
+    {
+        OptsVisitor *ov = opts_visitor_new(opts);
+        visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err);
+        opts_visitor_cleanup(ov);
+    }
+
+    if (error_is_set(&err)) {
+        fprintf(stderr, "qemu: %s\n", error_get_pretty(err));
+        error_free(err);
+        ret = -1;
+        goto error;
+    }
+
+    switch (object->kind) {
+    case NUMA_OPTIONS_KIND_NODE:
+        if (nb_numa_nodes >= MAX_NODES) {
+            fprintf(stderr, "qemu: too many NUMA nodes\n");
+            ret = -1;
+            goto error;
+        }
+        ret = numa_node_parse(object->node);
+        nb_numa_nodes++;
+        break;
+    case NUMA_OPTIONS_KIND_MEM:
+        ret = numa_mem_parse(object->mem);
+        nb_numa_mem_nodes++;
+        break;
+    default:
+        fprintf(stderr, "qemu: Invalid NUMA options type.\n");
+        ret = -1;
+    }
+
+error:
+    if (object) {
+        QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
+        visit_type_NumaOptions(qapi_dealloc_get_visitor(dv),
+                               &object, NULL, NULL);
+        qapi_dealloc_visitor_cleanup(dv);
+    }
+
+    return ret;
+}
diff --git a/qemu-options.hx b/qemu-options.hx
index d15338e..e9123b8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,11 +95,13 @@  specifies the maximum number of hotpluggable CPUs.
 ETEXI
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
+    "-numa node[,nodeid=node][,cpus=cpu[-cpu]]\n"
+    "-numa mem[,nodeid=node][,size=size]\n"
+    , QEMU_ARCH_ALL)
 STEXI
 @item -numa @var{opts}
 @findex -numa
-Simulate a multi node NUMA system. If mem and cpus are omitted, resources
+Simulate a multi node NUMA system. If @var{size} and @var{cpus} are omitted, resources
 are split equally.
 ETEXI
 
diff --git a/vl.c b/vl.c
index dfbc071..0ef5c5a 100644
--- a/vl.c
+++ b/vl.c
@@ -250,6 +250,7 @@  static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
     QTAILQ_HEAD_INITIALIZER(fw_boot_order);
 
 int nb_numa_nodes;
+int nb_numa_mem_nodes;
 uint64_t node_mem[MAX_NODES];
 unsigned long *node_cpumask[MAX_NODES];
 
@@ -1330,102 +1331,6 @@  char *get_boot_devices_list(size_t *size)
     return list;
 }
 
-static void numa_node_parse_cpus(int nodenr, const char *cpus)
-{
-    char *endptr;
-    unsigned long long value, endvalue;
-
-    /* Empty CPU range strings will be considered valid, they will simply
-     * not set any bit in the CPU bitmap.
-     */
-    if (!*cpus) {
-        return;
-    }
-
-    if (parse_uint(cpus, &value, &endptr, 10) < 0) {
-        goto error;
-    }
-    if (*endptr == '-') {
-        if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) {
-            goto error;
-        }
-    } else if (*endptr == '\0') {
-        endvalue = value;
-    } else {
-        goto error;
-    }
-
-    if (endvalue >= MAX_CPUMASK_BITS) {
-        endvalue = MAX_CPUMASK_BITS - 1;
-        fprintf(stderr,
-            "qemu: NUMA: A max of %d VCPUs are supported\n",
-             MAX_CPUMASK_BITS);
-    }
-
-    if (endvalue < value) {
-        goto error;
-    }
-
-    bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
-    return;
-
-error:
-    fprintf(stderr, "qemu: Invalid NUMA CPU range: %s\n", cpus);
-    exit(1);
-}
-
-static void numa_add(const char *optarg)
-{
-    char option[128];
-    char *endptr;
-    unsigned long long nodenr;
-
-    optarg = get_opt_name(option, 128, optarg, ',');
-    if (*optarg == ',') {
-        optarg++;
-    }
-    if (!strcmp(option, "node")) {
-
-        if (nb_numa_nodes >= MAX_NODES) {
-            fprintf(stderr, "qemu: too many NUMA nodes\n");
-            exit(1);
-        }
-
-        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
-            nodenr = nb_numa_nodes;
-        } else {
-            if (parse_uint_full(option, &nodenr, 10) < 0) {
-                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
-                exit(1);
-            }
-        }
-
-        if (nodenr >= MAX_NODES) {
-            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
-            exit(1);
-        }
-
-        if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
-        } else {
-            int64_t sval;
-            sval = strtosz(option, &endptr);
-            if (sval < 0 || *endptr) {
-                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
-                exit(1);
-            }
-            node_mem[nodenr] = sval;
-        }
-        if (get_param_value(option, 128, "cpus", optarg) != 0) {
-            numa_node_parse_cpus(nodenr, option);
-        }
-        nb_numa_nodes++;
-    } else {
-        fprintf(stderr, "Invalid -numa option: %s\n", option);
-        exit(1);
-    }
-}
-
 static QemuOptsList qemu_smp_opts = {
     .name = "smp-opts",
     .implied_opt_name = "cpus",
@@ -2961,6 +2866,7 @@  int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_tpmdev_opts);
     qemu_add_opts(&qemu_realtime_opts);
     qemu_add_opts(&qemu_msg_opts);
+    qemu_add_opts(&qemu_numa_opts);
 
     runstate_init();
 
@@ -2986,6 +2892,7 @@  int main(int argc, char **argv, char **envp)
     }
 
     nb_numa_nodes = 0;
+    nb_numa_mem_nodes = 0;
     nb_nics = 0;
 
     bdrv_init_with_whitelist();
@@ -3147,7 +3054,10 @@  int main(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_numa:
-                numa_add(optarg);
+                opts = qemu_opts_parse(qemu_find_opts("numa"), optarg, 1);
+                if (!opts) {
+                    exit(1);
+                }
                 break;
             case QEMU_OPTION_display:
                 display_type = select_display(optarg);
@@ -4228,6 +4138,15 @@  int main(int argc, char **argv, char **envp)
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
 
+    if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
+                          NULL, 1) != 0) {
+        exit(1);
+    }
+
+    if (nb_numa_mem_nodes > nb_numa_nodes) {
+        nb_numa_nodes = nb_numa_mem_nodes;
+    }
+
     if (nb_numa_nodes > 0) {
         int i;