diff mbox

[v3] vl.c: Support multiple CPU ranges on -numa option

Message ID jpgd2rjfaeq.fsf@redhat.com
State New
Headers show

Commit Message

Bandan Das June 18, 2013, 8:09 p.m. UTC
This allows us to use the cpu property multiple times
to specify multiple cpu (ranges) to the -numa option :

-numa node,cpu=1,cpu=2,cpu=4
or
-numa node,cpu=1-3,cpu=5

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 v3: Convert to using QemuOpts
 Use -cpu rather than -cpus which probably probably makes it more 
meaningful for non-range arguments

Sorry for reviving this up :)

This is a follow up to earlier proposals sent by Eduardo.

References :
1. http://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03832.html
2. https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03857.html

So, basically the format seemed easier to work with if we are thinking 
of using QemuOpts for -numa. Using -cpu rather than cpus probably
makes it less ambiguous as well IMO. However, it's probably not a good idea
if the current syntax is well established ?

---
 qemu-options.hx |   5 +--
 vl.c            | 108 ++++++++++++++++++++++++++++++++++----------------------
 2 files changed, 68 insertions(+), 45 deletions(-)

Comments

Igor Mammedov June 19, 2013, 11:42 a.m. UTC | #1
On Tue, 18 Jun 2013 16:09:49 -0400
Bandan Das <bsd@redhat.com> wrote:

> 
> This allows us to use the cpu property multiple times
> to specify multiple cpu (ranges) to the -numa option :
> 
> -numa node,cpu=1,cpu=2,cpu=4
> or
> -numa node,cpu=1-3,cpu=5
> 
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  v3: Convert to using QemuOpts
>  Use -cpu rather than -cpus which probably probably makes it more 
> meaningful for non-range arguments
> 
> Sorry for reviving this up :)
> 
> This is a follow up to earlier proposals sent by Eduardo.
> 
> References :
> 1. http://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03832.html
> 2. https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03857.html
> 
> So, basically the format seemed easier to work with if we are thinking 
> of using QemuOpts for -numa. Using -cpu rather than cpus probably
> makes it less ambiguous as well IMO. However, it's probably not a good idea
> if the current syntax is well established ?
In context of x86, allowing to specify CPU threads using cpu_index isn't correct,
since node calculated from APIC ID and node it gets from ACPI table could differ.

It could be better for CLI interface to accept socket number and build always
correct NUMA mapping internally using APIC IDs from CPUs, as it's done in real
hardware.

In addition it would allow to deprecate use of cpu_index on CLI interface, and
simplify following re-factoring to use socket/[core/]thread as means to address/
specify specific CPUs there and later in monitor/qmp interface as well.

> 
> ---
>  qemu-options.hx |   5 +--
>  vl.c            | 108 ++++++++++++++++++++++++++++++++++----------------------
>  2 files changed, 68 insertions(+), 45 deletions(-)
> 
> diff --git a/qemu-options.hx b/qemu-options.hx
> index bf94862..0e46f5e 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -95,12 +95,13 @@ specifies the maximum number of hotpluggable CPUs.
>  ETEXI
>  
>  DEF("numa", HAS_ARG, QEMU_OPTION_numa,
> -    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
> +    "-numa node[,mem=size][,cpu=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
>  STEXI
>  @item -numa @var{opts}
>  @findex -numa
>  Simulate a multi node NUMA system. If mem and cpus are omitted, resources
> -are split equally.
> +are split equally. The "-cpu" property may be specified multiple times
> +to denote multiple cpus or cpu ranges.
>  ETEXI
>  
>  DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
> diff --git a/vl.c b/vl.c
> index f94ec9c..519ca4c 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -516,6 +516,32 @@ static QemuOptsList qemu_realtime_opts = {
>      },
>  };
>  
> +static QemuOptsList qemu_numa_opts = {
> +    .name = "numa",
> +    .implied_opt_name = "type",
> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
> +    .desc = {
> +        {
> +            .name = "type",
> +            .type = QEMU_OPT_STRING,
> +            .help = "node type"
> +        },{
> +            .name = "nodeid",
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "node ID"
> +        },{
> +            .name = "mem",
> +            .type = QEMU_OPT_SIZE,
> +            .help = "memory size"
> +        },{
> +            .name = "cpu",
> +            .type = QEMU_OPT_STRING,
> +            .help = "cpu number or range"
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
>  const char *qemu_get_vm_name(void)
>  {
>      return qemu_name;
> @@ -1349,56 +1375,37 @@ error:
>      exit(1);
>  }
>  
> -static void numa_add(const char *optarg)
> +
> +static int numa_add_cpus(const char *name, const char *value, void *opaque)
>  {
> -    char option[128];
> -    char *endptr;
> -    unsigned long long nodenr;
> +    int *nodenr = opaque;
>  
> -    optarg = get_opt_name(option, 128, optarg, ',');
> -    if (*optarg == ',') {
> -        optarg++;
> +    if (!strcmp(name, "cpu")) {
> +        numa_node_parse_cpus(*nodenr, value);
>      }
> -    if (!strcmp(option, "node")) {
> -
> -        if (nb_numa_nodes >= MAX_NODES) {
> -            fprintf(stderr, "qemu: too many NUMA nodes\n");
> -            exit(1);
> -        }
> +    return 0;
> +}
>  
> -        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
> -            nodenr = nb_numa_nodes;
> -        } else {
> -            if (parse_uint_full(option, &nodenr, 10) < 0) {
> -                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
> -                exit(1);
> -            }
> -        }
> +static int numa_init_func(QemuOpts *opts, void *opaque)
> +{
> +    uint64_t nodenr, mem_size;
>  
> -        if (nodenr >= MAX_NODES) {
> -            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
> -            exit(1);
> -        }
> +    nodenr = qemu_opt_get_number(opts, "nodeid", nb_numa_nodes++);
>  
> -        if (get_param_value(option, 128, "mem", optarg) == 0) {
> -            node_mem[nodenr] = 0;
> -        } else {
> -            int64_t sval;
> -            sval = strtosz(option, &endptr);
> -            if (sval < 0 || *endptr) {
> -                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
> -                exit(1);
> -            }
> -            node_mem[nodenr] = sval;
> -        }
> -        if (get_param_value(option, 128, "cpus", optarg) != 0) {
> -            numa_node_parse_cpus(nodenr, option);
> -        }
> -        nb_numa_nodes++;
> -    } else {
> -        fprintf(stderr, "Invalid -numa option: %s\n", option);
> +    if (nodenr >= MAX_NODES) {
> +        fprintf(stderr, "qemu: Max number of NUMA nodes reached : %d\n",
> +                (int)nodenr);
>          exit(1);
>      }
> +
> +    mem_size = qemu_opt_get_size(opts, "mem", 0);
> +    node_mem[nodenr] = mem_size;
> +
> +    if (qemu_opt_foreach(opts, numa_add_cpus, &nodenr, 1) < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
>  }
>  
>  static void smp_parse(const char *optarg)
> @@ -2901,6 +2908,7 @@ int main(int argc, char **argv, char **envp)
>      qemu_add_opts(&qemu_object_opts);
>      qemu_add_opts(&qemu_tpmdev_opts);
>      qemu_add_opts(&qemu_realtime_opts);
> +    qemu_add_opts(&qemu_numa_opts);
>  
>      runstate_init();
>  
> @@ -3087,7 +3095,16 @@ int main(int argc, char **argv, char **envp)
>                  }
>                  break;
>              case QEMU_OPTION_numa:
> -                numa_add(optarg);
> +                olist = qemu_find_opts("numa");
> +                opts = qemu_opts_parse(olist, optarg, 1);
> +                if (!opts) {
> +                    exit(1);
> +                }
> +                optarg = qemu_opt_get(opts, "type");
> +                if (!optarg || strcmp(optarg, "node")) {
> +                    fprintf(stderr, "qemu: Incorrect format for numa option\n");
> +                    exit(1);
> +                }
>                  break;
>              case QEMU_OPTION_display:
>                  display_type = select_display(optarg);
> @@ -4217,6 +4234,11 @@ int main(int argc, char **argv, char **envp)
>  
>      register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
>  
> +    if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
> +                          NULL, 1) != 0) {
> +        exit(1);
> +    }
> +
>      if (nb_numa_nodes > 0) {
>          int i;
>
Eduardo Habkost June 19, 2013, 1:26 p.m. UTC | #2
On Wed, Jun 19, 2013 at 01:42:52PM +0200, Igor Mammedov wrote:
> On Tue, 18 Jun 2013 16:09:49 -0400
> Bandan Das <bsd@redhat.com> wrote:
> 
> > 
> > This allows us to use the cpu property multiple times
> > to specify multiple cpu (ranges) to the -numa option :
> > 
> > -numa node,cpu=1,cpu=2,cpu=4
> > or
> > -numa node,cpu=1-3,cpu=5
> > 
> > Signed-off-by: Bandan Das <bsd@redhat.com>
> > ---
> >  v3: Convert to using QemuOpts
> >  Use -cpu rather than -cpus which probably probably makes it more 
> > meaningful for non-range arguments
> > 
> > Sorry for reviving this up :)
> > 
> > This is a follow up to earlier proposals sent by Eduardo.
> > 
> > References :
> > 1. http://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03832.html
> > 2. https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03857.html
> > 
> > So, basically the format seemed easier to work with if we are thinking 
> > of using QemuOpts for -numa. Using -cpu rather than cpus probably
> > makes it less ambiguous as well IMO. However, it's probably not a good idea
> > if the current syntax is well established ?

libvirt uses the "cpus" option already, so we have to keep it working.

> In context of x86, allowing to specify CPU threads using cpu_index isn't correct,
> since node calculated from APIC ID and node it gets from ACPI table could differ.
> 
> It could be better for CLI interface to accept socket number and build always
> correct NUMA mapping internally using APIC IDs from CPUs, as it's done in real
> hardware.
> 
> In addition it would allow to deprecate use of cpu_index on CLI interface, and
> simplify following re-factoring to use socket/[core/]thread as means to address/
> specify specific CPUs there and later in monitor/qmp interface as well.

What about simply accepting a QOM object path? Today we could only
accept CPU thread objects (because there are no socket/core objects
yet), but the day we introduce CPU socket objects, we can change the
code to accept them without changing the syntax.
Igor Mammedov June 20, 2013, 9:30 a.m. UTC | #3
On Wed, 19 Jun 2013 10:26:42 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Wed, Jun 19, 2013 at 01:42:52PM +0200, Igor Mammedov wrote:
> > On Tue, 18 Jun 2013 16:09:49 -0400
> > Bandan Das <bsd@redhat.com> wrote:
> > 
> > > 
> > > This allows us to use the cpu property multiple times
> > > to specify multiple cpu (ranges) to the -numa option :
> > > 
> > > -numa node,cpu=1,cpu=2,cpu=4
> > > or
> > > -numa node,cpu=1-3,cpu=5
> > > 
> > > Signed-off-by: Bandan Das <bsd@redhat.com>
> > > ---
> > >  v3: Convert to using QemuOpts
> > >  Use -cpu rather than -cpus which probably probably makes it more 
> > > meaningful for non-range arguments
> > > 
> > > Sorry for reviving this up :)
> > > 
> > > This is a follow up to earlier proposals sent by Eduardo.
> > > 
> > > References :
> > > 1. http://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03832.html
> > > 2. https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03857.html
> > > 
> > > So, basically the format seemed easier to work with if we are thinking 
> > > of using QemuOpts for -numa. Using -cpu rather than cpus probably
> > > makes it less ambiguous as well IMO. However, it's probably not a good idea
> > > if the current syntax is well established ?
> 
> libvirt uses the "cpus" option already, so we have to keep it working.
Sure, we can leave it as it's now for some time while a new interface is
introduced/adopted. And than later deprecate "cpus".

> 
> > In context of x86, allowing to specify CPU threads using cpu_index isn't correct,
> > since node calculated from APIC ID and node it gets from ACPI table could differ.
> > 
> > It could be better for CLI interface to accept socket number and build always
> > correct NUMA mapping internally using APIC IDs from CPUs, as it's done in real
> > hardware.
> > 
> > In addition it would allow to deprecate use of cpu_index on CLI interface, and
> > simplify following re-factoring to use socket/[core/]thread as means to address/
> > specify specific CPUs there and later in monitor/qmp interface as well.
> 
> What about simply accepting a QOM object path? Today we could only
> accept CPU thread objects (because there are no socket/core objects
> yet), but the day we introduce CPU socket objects, we can change the
> code to accept them without changing the syntax.
It doesn't matter if it's socket=N or QOM path, the idea is not to allow
individual CPU threads there to avoid misconfiguration, but use socket entities 
in some form in interface part. Sockets could be dummy containers for initial
implementation so not to delay sanitizing NUMA code.
Paolo Bonzini June 20, 2013, 9:52 a.m. UTC | #4
Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>> > > > So, basically the format seemed easier to work with if we are thinking 
>>>> > > > of using QemuOpts for -numa. Using -cpu rather than cpus probably
>>>> > > > makes it less ambiguous as well IMO. However, it's probably not a good idea
>>>> > > > if the current syntax is well established ?
>> > 
>> > libvirt uses the "cpus" option already, so we have to keep it working.
> Sure, we can leave it as it's now for some time while a new interface is
> introduced/adopted. And than later deprecate "cpus".

So, you used a new name because the new behavior of "-numa
node,cpus=1-2,cpus=3-4" would be incompatible with the old.

Personally I don't think that's a problem, but I remember a long
discussion in the past.  Igor/Eduardo, do you remember the conclusions?

Paolo
Michael Tokarev June 20, 2013, 11:34 a.m. UTC | #5
20.06.2013 13:52, Paolo Bonzini wrote:
> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>> Sure, we can leave it as it's now for some time while a new interface is
>> introduced/adopted. And than later deprecate "cpus".
> 
> So, you used a new name because the new behavior of "-numa
> node,cpus=1-2,cpus=3-4" would be incompatible with the old.

BTW, as I tried to touch exactly the same place yesterday (trying
to convert it to QemuOpts) -- what does this "node" mean?

For example, with

  -device [type=]devicetype,foo=bar,xzy=abc

this creates a new device for each "invocation" of option.  But
what does this `-numa node' mean?  Can there be anything else
besides node?  Why it is needed/used for?

This -numa option is the last one which uses the old option
parsing mechanism (there's also some smbios-related thing
but it's simple to convert, I almost got it ready yesterday),
but it is rather non-standard.

Thanks,

/mjt
Paolo Bonzini June 20, 2013, 1:02 p.m. UTC | #6
Il 20/06/2013 13:34, Michael Tokarev ha scritto:
> 20.06.2013 13:52, Paolo Bonzini wrote:
>> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>>> Sure, we can leave it as it's now for some time while a new interface is
>>> introduced/adopted. And than later deprecate "cpus".
>>
>> So, you used a new name because the new behavior of "-numa
>> node,cpus=1-2,cpus=3-4" would be incompatible with the old.
> 
> BTW, as I tried to touch exactly the same place yesterday (trying
> to convert it to QemuOpts) -- what does this "node" mean?
> 
> For example, with
> 
>   -device [type=]devicetype,foo=bar,xzy=abc
> 
> this creates a new device for each "invocation" of option.  But
> what does this `-numa node' mean?  Can there be anything else
> besides node?  Why it is needed/used for?

Nothing, I think it's just that somebody took inspiration from "-device". :)

> This -numa option is the last one which uses the old option
> parsing mechanism (there's also some smbios-related thing
> but it's simple to convert, I almost got it ready yesterday),

That would be awesome.

Paolo
Eduardo Habkost June 20, 2013, 1:26 p.m. UTC | #7
On Thu, Jun 20, 2013 at 11:52:42AM +0200, Paolo Bonzini wrote:
> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
> >>>> > > > So, basically the format seemed easier to work with if we are thinking 
> >>>> > > > of using QemuOpts for -numa. Using -cpu rather than cpus probably
> >>>> > > > makes it less ambiguous as well IMO. However, it's probably not a good idea
> >>>> > > > if the current syntax is well established ?
> >> > 
> >> > libvirt uses the "cpus" option already, so we have to keep it working.
> > Sure, we can leave it as it's now for some time while a new interface is
> > introduced/adopted. And than later deprecate "cpus".
> 
> So, you used a new name because the new behavior of "-numa
> node,cpus=1-2,cpus=3-4" would be incompatible with the old.

I don't think anybody uses "cpus=1-2,cpus=3-4" today, so I believe we
can change its behavior. The problem was to get agreement on the syntax
to represent multiple CPU ranges.

> 
> Personally I don't think that's a problem, but I remember a long
> discussion in the past.  Igor/Eduardo, do you remember the conclusions?

I don't remember seeing the discussion reach any conclusion,
unfortunately.
Paolo Bonzini June 20, 2013, 1:30 p.m. UTC | #8
Il 20/06/2013 15:26, Eduardo Habkost ha scritto:
> On Thu, Jun 20, 2013 at 11:52:42AM +0200, Paolo Bonzini wrote:
>> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>>>>>>> So, basically the format seemed easier to work with if we are thinking 
>>>>>>>>> of using QemuOpts for -numa. Using -cpu rather than cpus probably
>>>>>>>>> makes it less ambiguous as well IMO. However, it's probably not a good idea
>>>>>>>>> if the current syntax is well established ?
>>>>>
>>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>>> Sure, we can leave it as it's now for some time while a new interface is
>>> introduced/adopted. And than later deprecate "cpus".
>>
>> So, you used a new name because the new behavior of "-numa
>> node,cpus=1-2,cpus=3-4" would be incompatible with the old.
> 
> I don't think anybody uses "cpus=1-2,cpus=3-4" today, so I believe we
> can change its behavior. The problem was to get agreement on the syntax
> to represent multiple CPU ranges.

Ok.  I think almost everyone agreed on "cpus=1-2,cpus=3-4", which is
basically what Bandan's patch does minus s/cpu/cpus/.  It matches what
already happens with other options (SLIRP), so it's hardly surprising.

Let's go on with that.

Paolo

>> Personally I don't think that's a problem, but I remember a long
>> discussion in the past.  Igor/Eduardo, do you remember the conclusions?
> 
> I don't remember seeing the discussion reach any conclusion,
> unfortunately.
>
Bandan Das June 20, 2013, 4:02 p.m. UTC | #9
Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 20/06/2013 15:26, Eduardo Habkost ha scritto:
>> On Thu, Jun 20, 2013 at 11:52:42AM +0200, Paolo Bonzini wrote:
>>> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>>>>>>>> So, basically the format seemed easier to work with if we are thinking 
>>>>>>>>>> of using QemuOpts for -numa. Using -cpu rather than cpus probably
>>>>>>>>>> makes it less ambiguous as well IMO. However, it's probably not a good idea
>>>>>>>>>> if the current syntax is well established ?
>>>>>>
>>>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>>>> Sure, we can leave it as it's now for some time while a new interface is
>>>> introduced/adopted. And than later deprecate "cpus".
>>>
>>> So, you used a new name because the new behavior of "-numa
>>> node,cpus=1-2,cpus=3-4" would be incompatible with the old.
>> 
>> I don't think anybody uses "cpus=1-2,cpus=3-4" today, so I believe we
>> can change its behavior. The problem was to get agreement on the syntax
>> to represent multiple CPU ranges.
>
> Ok.  I think almost everyone agreed on "cpus=1-2,cpus=3-4", which is
> basically what Bandan's patch does minus s/cpu/cpus/.  It matches what
> already happens with other options (SLIRP), so it's hardly surprising.

Good, so should I spin a new version with "cpus" ?

Also note that this patch actually doesn't add any extra code to support 
multiple cpus arguments. It all happens automatically as part of conversion to
QemuOpts. So, if we need to revisit the syntax later, we can always do that.

Bandan
> Let's go on with that.
>
> Paolo
>
>>> Personally I don't think that's a problem, but I remember a long
>>> discussion in the past.  Igor/Eduardo, do you remember the conclusions?
>> 
>> I don't remember seeing the discussion reach any conclusion,
>> unfortunately.
>>
Wanlong Gao June 21, 2013, 6:53 a.m. UTC | #10
On 06/21/2013 12:02 AM, Bandan Das wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> Il 20/06/2013 15:26, Eduardo Habkost ha scritto:
>>> On Thu, Jun 20, 2013 at 11:52:42AM +0200, Paolo Bonzini wrote:
>>>> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>>>>>>>>> So, basically the format seemed easier to work with if we are thinking 
>>>>>>>>>>> of using QemuOpts for -numa. Using -cpu rather than cpus probably
>>>>>>>>>>> makes it less ambiguous as well IMO. However, it's probably not a good idea
>>>>>>>>>>> if the current syntax is well established ?
>>>>>>>
>>>>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>>>>> Sure, we can leave it as it's now for some time while a new interface is
>>>>> introduced/adopted. And than later deprecate "cpus".
>>>>
>>>> So, you used a new name because the new behavior of "-numa
>>>> node,cpus=1-2,cpus=3-4" would be incompatible with the old.
>>>
>>> I don't think anybody uses "cpus=1-2,cpus=3-4" today, so I believe we
>>> can change its behavior. The problem was to get agreement on the syntax
>>> to represent multiple CPU ranges.
>>
>> Ok.  I think almost everyone agreed on "cpus=1-2,cpus=3-4", which is
>> basically what Bandan's patch does minus s/cpu/cpus/.  It matches what
>> already happens with other options (SLIRP), so it's hardly surprising.
> 
> Good, so should I spin a new version with "cpus" ?

I already merged your patch to my patch set "Add support for binding guest numa nodes to host numa nodes"
since I should base on that.

Thanks,
Wanlong Gao

> 
> Also note that this patch actually doesn't add any extra code to support 
> multiple cpus arguments. It all happens automatically as part of conversion to
> QemuOpts. So, if we need to revisit the syntax later, we can always do that.
> 
> Bandan
>> Let's go on with that.
>>
>> Paolo
>>
>>>> Personally I don't think that's a problem, but I remember a long
>>>> discussion in the past.  Igor/Eduardo, do you remember the conclusions?
>>>
>>> I don't remember seeing the discussion reach any conclusion,
>>> unfortunately.
>>>
> 
>
Bandan Das June 21, 2013, 2:51 p.m. UTC | #11
Wanlong Gao <gaowanlong@cn.fujitsu.com> writes:

> On 06/21/2013 12:02 AM, Bandan Das wrote:
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>> 
>>> Il 20/06/2013 15:26, Eduardo Habkost ha scritto:
>>>> On Thu, Jun 20, 2013 at 11:52:42AM +0200, Paolo Bonzini wrote:
>>>>> Il 20/06/2013 11:30, Igor Mammedov ha scritto:
>>>>>>>>>>>> So, basically the format seemed easier to work with if we are thinking 
>>>>>>>>>>>> of using QemuOpts for -numa. Using -cpu rather than cpus probably
>>>>>>>>>>>> makes it less ambiguous as well IMO. However, it's probably not a good idea
>>>>>>>>>>>> if the current syntax is well established ?
>>>>>>>>
>>>>>>>> libvirt uses the "cpus" option already, so we have to keep it working.
>>>>>> Sure, we can leave it as it's now for some time while a new interface is
>>>>>> introduced/adopted. And than later deprecate "cpus".
>>>>>
>>>>> So, you used a new name because the new behavior of "-numa
>>>>> node,cpus=1-2,cpus=3-4" would be incompatible with the old.
>>>>
>>>> I don't think anybody uses "cpus=1-2,cpus=3-4" today, so I believe we
>>>> can change its behavior. The problem was to get agreement on the syntax
>>>> to represent multiple CPU ranges.
>>>
>>> Ok.  I think almost everyone agreed on "cpus=1-2,cpus=3-4", which is
>>> basically what Bandan's patch does minus s/cpu/cpus/.  It matches what
>>> already happens with other options (SLIRP), so it's hardly surprising.
>> 
>> Good, so should I spin a new version with "cpus" ?
>
> I already merged your patch to my patch set "Add support for binding guest numa nodes to host numa nodes"
> since I should base on that.
>
> Thanks,
> Wanlong Gao

Oh, great! Thank you for taking care of the "cpus" change.


>> 
>> Also note that this patch actually doesn't add any extra code to support 
>> multiple cpus arguments. It all happens automatically as part of conversion to
>> QemuOpts. So, if we need to revisit the syntax later, we can always do that.
>> 
>> Bandan
>>> Let's go on with that.
>>>
>>> Paolo
>>>
>>>>> Personally I don't think that's a problem, but I remember a long
>>>>> discussion in the past.  Igor/Eduardo, do you remember the conclusions?
>>>>
>>>> I don't remember seeing the discussion reach any conclusion,
>>>> unfortunately.
>>>>
>> 
>>
diff mbox

Patch

diff --git a/qemu-options.hx b/qemu-options.hx
index bf94862..0e46f5e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,12 +95,13 @@  specifies the maximum number of hotpluggable CPUs.
 ETEXI
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-    "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
+    "-numa node[,mem=size][,cpu=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
 STEXI
 @item -numa @var{opts}
 @findex -numa
 Simulate a multi node NUMA system. If mem and cpus are omitted, resources
-are split equally.
+are split equally. The "-cpu" property may be specified multiple times
+to denote multiple cpus or cpu ranges.
 ETEXI
 
 DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
diff --git a/vl.c b/vl.c
index f94ec9c..519ca4c 100644
--- a/vl.c
+++ b/vl.c
@@ -516,6 +516,32 @@  static QemuOptsList qemu_realtime_opts = {
     },
 };
 
+static QemuOptsList qemu_numa_opts = {
+    .name = "numa",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
+    .desc = {
+        {
+            .name = "type",
+            .type = QEMU_OPT_STRING,
+            .help = "node type"
+        },{
+            .name = "nodeid",
+            .type = QEMU_OPT_NUMBER,
+            .help = "node ID"
+        },{
+            .name = "mem",
+            .type = QEMU_OPT_SIZE,
+            .help = "memory size"
+        },{
+            .name = "cpu",
+            .type = QEMU_OPT_STRING,
+            .help = "cpu number or range"
+        },
+        { /* end of list */ }
+    },
+};
+
 const char *qemu_get_vm_name(void)
 {
     return qemu_name;
@@ -1349,56 +1375,37 @@  error:
     exit(1);
 }
 
-static void numa_add(const char *optarg)
+
+static int numa_add_cpus(const char *name, const char *value, void *opaque)
 {
-    char option[128];
-    char *endptr;
-    unsigned long long nodenr;
+    int *nodenr = opaque;
 
-    optarg = get_opt_name(option, 128, optarg, ',');
-    if (*optarg == ',') {
-        optarg++;
+    if (!strcmp(name, "cpu")) {
+        numa_node_parse_cpus(*nodenr, value);
     }
-    if (!strcmp(option, "node")) {
-
-        if (nb_numa_nodes >= MAX_NODES) {
-            fprintf(stderr, "qemu: too many NUMA nodes\n");
-            exit(1);
-        }
+    return 0;
+}
 
-        if (get_param_value(option, 128, "nodeid", optarg) == 0) {
-            nodenr = nb_numa_nodes;
-        } else {
-            if (parse_uint_full(option, &nodenr, 10) < 0) {
-                fprintf(stderr, "qemu: Invalid NUMA nodeid: %s\n", option);
-                exit(1);
-            }
-        }
+static int numa_init_func(QemuOpts *opts, void *opaque)
+{
+    uint64_t nodenr, mem_size;
 
-        if (nodenr >= MAX_NODES) {
-            fprintf(stderr, "qemu: invalid NUMA nodeid: %llu\n", nodenr);
-            exit(1);
-        }
+    nodenr = qemu_opt_get_number(opts, "nodeid", nb_numa_nodes++);
 
-        if (get_param_value(option, 128, "mem", optarg) == 0) {
-            node_mem[nodenr] = 0;
-        } else {
-            int64_t sval;
-            sval = strtosz(option, &endptr);
-            if (sval < 0 || *endptr) {
-                fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
-                exit(1);
-            }
-            node_mem[nodenr] = sval;
-        }
-        if (get_param_value(option, 128, "cpus", optarg) != 0) {
-            numa_node_parse_cpus(nodenr, option);
-        }
-        nb_numa_nodes++;
-    } else {
-        fprintf(stderr, "Invalid -numa option: %s\n", option);
+    if (nodenr >= MAX_NODES) {
+        fprintf(stderr, "qemu: Max number of NUMA nodes reached : %d\n",
+                (int)nodenr);
         exit(1);
     }
+
+    mem_size = qemu_opt_get_size(opts, "mem", 0);
+    node_mem[nodenr] = mem_size;
+
+    if (qemu_opt_foreach(opts, numa_add_cpus, &nodenr, 1) < 0) {
+        return -1;
+    }
+
+    return 0;
 }
 
 static void smp_parse(const char *optarg)
@@ -2901,6 +2908,7 @@  int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_object_opts);
     qemu_add_opts(&qemu_tpmdev_opts);
     qemu_add_opts(&qemu_realtime_opts);
+    qemu_add_opts(&qemu_numa_opts);
 
     runstate_init();
 
@@ -3087,7 +3095,16 @@  int main(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_numa:
-                numa_add(optarg);
+                olist = qemu_find_opts("numa");
+                opts = qemu_opts_parse(olist, optarg, 1);
+                if (!opts) {
+                    exit(1);
+                }
+                optarg = qemu_opt_get(opts, "type");
+                if (!optarg || strcmp(optarg, "node")) {
+                    fprintf(stderr, "qemu: Incorrect format for numa option\n");
+                    exit(1);
+                }
                 break;
             case QEMU_OPTION_display:
                 display_type = select_display(optarg);
@@ -4217,6 +4234,11 @@  int main(int argc, char **argv, char **envp)
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
 
+    if (qemu_opts_foreach(qemu_find_opts("numa"), numa_init_func,
+                          NULL, 1) != 0) {
+        exit(1);
+    }
+
     if (nb_numa_nodes > 0) {
         int i;