diff mbox series

[02/10] numa: introduce MachineClass::forbid_asymmetrical_numa

Message ID 20200814205424.543857-3-danielhb413@gmail.com
State New
Headers show
Series pseries NUMA distance rework | expand

Commit Message

Daniel Henrique Barboza Aug. 14, 2020, 8:54 p.m. UTC
The pSeries machine does not support asymmetrical NUMA configurations.

CC: Eduardo Habkost <ehabkost@redhat.com>
CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 hw/core/numa.c      | 7 +++++++
 hw/ppc/spapr.c      | 1 +
 include/hw/boards.h | 1 +
 3 files changed, 9 insertions(+)

Comments

David Gibson Aug. 20, 2020, 1:17 a.m. UTC | #1
On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> The pSeries machine does not support asymmetrical NUMA
> configurations.

This seems a bit oddly specific to have as a global machine class
property.

Would it make more sense for machines with specific NUMA constraints
to just verify those during their initialization?
> 
> CC: Eduardo Habkost <ehabkost@redhat.com>
> CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  hw/core/numa.c      | 7 +++++++
>  hw/ppc/spapr.c      | 1 +
>  include/hw/boards.h | 1 +
>  3 files changed, 9 insertions(+)
> 
> diff --git a/hw/core/numa.c b/hw/core/numa.c
> index d1a94a14f8..1e81233c1d 100644
> --- a/hw/core/numa.c
> +++ b/hw/core/numa.c
> @@ -547,6 +547,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
>   */
>  static void validate_numa_distance(MachineState *ms)
>  {
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
>      int src, dst;
>      bool is_asymmetrical = false;
>      int nb_numa_nodes = ms->numa_state->num_nodes;
> @@ -575,6 +576,12 @@ static void validate_numa_distance(MachineState *ms)
>      }
>  
>      if (is_asymmetrical) {
> +        if (mc->forbid_asymmetrical_numa) {
> +            error_report("This machine type does not support "
> +                         "asymmetrical numa distances.");
> +            exit(EXIT_FAILURE);
> +        }
> +
>          for (src = 0; src < nb_numa_nodes; src++) {
>              for (dst = 0; dst < nb_numa_nodes; dst++) {
>                  if (src != dst && numa_info[src].distance[dst] == 0) {
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index dd2fa4826b..3b16edaf4c 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4512,6 +4512,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>       */
>      mc->numa_mem_align_shift = 28;
>      mc->auto_enable_numa = true;
> +    mc->forbid_asymmetrical_numa = true;
>  
>      smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index bc5b82ad20..dc6cdd1c53 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -215,6 +215,7 @@ struct MachineClass {
>      bool nvdimm_supported;
>      bool numa_mem_supported;
>      bool auto_enable_numa;
> +    bool forbid_asymmetrical_numa;
>      const char *default_ram_id;
>  
>      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
Eduardo Habkost Aug. 20, 2020, 2:11 a.m. UTC | #2
On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
> On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> > The pSeries machine does not support asymmetrical NUMA
> > configurations.
> 
> This seems a bit oddly specific to have as a global machine class
> property.
> 
> Would it make more sense for machines with specific NUMA constraints
> to just verify those during their initialization?

This would be much simpler.  However, I like the idea of
representing machine-specific configuration validation rules as
data that can eventually be exported to management software.

(CCing John Snow, who had spent some time thinking about
configuration validation recently.)


> > 
> > CC: Eduardo Habkost <ehabkost@redhat.com>
> > CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > ---
> >  hw/core/numa.c      | 7 +++++++
> >  hw/ppc/spapr.c      | 1 +
> >  include/hw/boards.h | 1 +
> >  3 files changed, 9 insertions(+)
> > 
> > diff --git a/hw/core/numa.c b/hw/core/numa.c
> > index d1a94a14f8..1e81233c1d 100644
> > --- a/hw/core/numa.c
> > +++ b/hw/core/numa.c
> > @@ -547,6 +547,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
> >   */
> >  static void validate_numa_distance(MachineState *ms)
> >  {
> > +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> >      int src, dst;
> >      bool is_asymmetrical = false;
> >      int nb_numa_nodes = ms->numa_state->num_nodes;
> > @@ -575,6 +576,12 @@ static void validate_numa_distance(MachineState *ms)
> >      }
> >  
> >      if (is_asymmetrical) {
> > +        if (mc->forbid_asymmetrical_numa) {
> > +            error_report("This machine type does not support "
> > +                         "asymmetrical numa distances.");
> > +            exit(EXIT_FAILURE);
> > +        }
> > +
> >          for (src = 0; src < nb_numa_nodes; src++) {
> >              for (dst = 0; dst < nb_numa_nodes; dst++) {
> >                  if (src != dst && numa_info[src].distance[dst] == 0) {
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index dd2fa4826b..3b16edaf4c 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -4512,6 +4512,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >       */
> >      mc->numa_mem_align_shift = 28;
> >      mc->auto_enable_numa = true;
> > +    mc->forbid_asymmetrical_numa = true;
> >  
> >      smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
> >      smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
> > diff --git a/include/hw/boards.h b/include/hw/boards.h
> > index bc5b82ad20..dc6cdd1c53 100644
> > --- a/include/hw/boards.h
> > +++ b/include/hw/boards.h
> > @@ -215,6 +215,7 @@ struct MachineClass {
> >      bool nvdimm_supported;
> >      bool numa_mem_supported;
> >      bool auto_enable_numa;
> > +    bool forbid_asymmetrical_numa;
> >      const char *default_ram_id;
> >  
> >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> 
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson Aug. 20, 2020, 4:15 a.m. UTC | #3
On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
> On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
> > On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> > > The pSeries machine does not support asymmetrical NUMA
> > > configurations.
> > 
> > This seems a bit oddly specific to have as a global machine class
> > property.
> > 
> > Would it make more sense for machines with specific NUMA constraints
> > to just verify those during their initialization?
> 
> This would be much simpler.  However, I like the idea of
> representing machine-specific configuration validation rules as
> data that can eventually be exported to management software.

Ah, ok, so basically the usual tradeoff between flexibility and
advertisability.

So, in that case, I guess the question is whether we envisage "no
assymmetry" as a constraint common enough that it's worth creating an
advertisable rule or not.  If we only ever have one user, then we
haven't really done any better than hard coding the constraint in the
manageent software.

Of course to complicate matters, in the longer term we're looking at
removing that constraint from pseries - but doing so will be dependent
on the guest kernel understanding a new format for the NUMA
information in the device tree.  So qemu alone won't have enough
information to tell if such a configuration is possible or not.

> (CCing John Snow, who had spent some time thinking about
> configuration validation recently.)
> 
> 
> > > 
> > > CC: Eduardo Habkost <ehabkost@redhat.com>
> > > CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > > ---
> > >  hw/core/numa.c      | 7 +++++++
> > >  hw/ppc/spapr.c      | 1 +
> > >  include/hw/boards.h | 1 +
> > >  3 files changed, 9 insertions(+)
> > > 
> > > diff --git a/hw/core/numa.c b/hw/core/numa.c
> > > index d1a94a14f8..1e81233c1d 100644
> > > --- a/hw/core/numa.c
> > > +++ b/hw/core/numa.c
> > > @@ -547,6 +547,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
> > >   */
> > >  static void validate_numa_distance(MachineState *ms)
> > >  {
> > > +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> > >      int src, dst;
> > >      bool is_asymmetrical = false;
> > >      int nb_numa_nodes = ms->numa_state->num_nodes;
> > > @@ -575,6 +576,12 @@ static void validate_numa_distance(MachineState *ms)
> > >      }
> > >  
> > >      if (is_asymmetrical) {
> > > +        if (mc->forbid_asymmetrical_numa) {
> > > +            error_report("This machine type does not support "
> > > +                         "asymmetrical numa distances.");
> > > +            exit(EXIT_FAILURE);
> > > +        }
> > > +
> > >          for (src = 0; src < nb_numa_nodes; src++) {
> > >              for (dst = 0; dst < nb_numa_nodes; dst++) {
> > >                  if (src != dst && numa_info[src].distance[dst] == 0) {
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index dd2fa4826b..3b16edaf4c 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -4512,6 +4512,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> > >       */
> > >      mc->numa_mem_align_shift = 28;
> > >      mc->auto_enable_numa = true;
> > > +    mc->forbid_asymmetrical_numa = true;
> > >  
> > >      smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
> > >      smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
> > > diff --git a/include/hw/boards.h b/include/hw/boards.h
> > > index bc5b82ad20..dc6cdd1c53 100644
> > > --- a/include/hw/boards.h
> > > +++ b/include/hw/boards.h
> > > @@ -215,6 +215,7 @@ struct MachineClass {
> > >      bool nvdimm_supported;
> > >      bool numa_mem_supported;
> > >      bool auto_enable_numa;
> > > +    bool forbid_asymmetrical_numa;
> > >      const char *default_ram_id;
> > >  
> > >      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
> > 
> 
> 
>
Daniel Henrique Barboza Aug. 20, 2020, 10:33 a.m. UTC | #4
On 8/20/20 1:15 AM, David Gibson wrote:
> On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
>> On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
>>> On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
>>>> The pSeries machine does not support asymmetrical NUMA
>>>> configurations.
>>>
>>> This seems a bit oddly specific to have as a global machine class
>>> property.
>>>
>>> Would it make more sense for machines with specific NUMA constraints
>>> to just verify those during their initialization?
>>
>> This would be much simpler.  However, I like the idea of
>> representing machine-specific configuration validation rules as
>> data that can eventually be exported to management software.
> 
> Ah, ok, so basically the usual tradeoff between flexibility and
> advertisability.



To provide context, what I did here was inspired by this commit:

commit 0533ef5f2089f4f12a0ec5c8035e5e15ba0b5556
Author: Tao Xu <tao3.xu@intel.com>
Date:   Thu Sep 5 16:32:38 2019 +0800

     numa: Introduce MachineClass::auto_enable_numa for implicit NUMA node


In this commit, exclusive NUMA code from spapr.c was taken and put it
into numa.c, with a flag being set in spapr machine_init.


Thanks,


DHB


> 
> So, in that case, I guess the question is whether we envisage "no
> assymmetry" as a constraint common enough that it's worth creating an
> advertisable rule or not.  If we only ever have one user, then we
> haven't really done any better than hard coding the constraint in the
> manageent software.
> 
> Of course to complicate matters, in the longer term we're looking at
> removing that constraint from pseries - but doing so will be dependent
> on the guest kernel understanding a new format for the NUMA
> information in the device tree.  So qemu alone won't have enough
> information to tell if such a configuration is possible or not.
> 
>> (CCing John Snow, who had spent some time thinking about
>> configuration validation recently.)
>>
>>
>>>>
>>>> CC: Eduardo Habkost <ehabkost@redhat.com>
>>>> CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
>>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>>>> ---
>>>>   hw/core/numa.c      | 7 +++++++
>>>>   hw/ppc/spapr.c      | 1 +
>>>>   include/hw/boards.h | 1 +
>>>>   3 files changed, 9 insertions(+)
>>>>
>>>> diff --git a/hw/core/numa.c b/hw/core/numa.c
>>>> index d1a94a14f8..1e81233c1d 100644
>>>> --- a/hw/core/numa.c
>>>> +++ b/hw/core/numa.c
>>>> @@ -547,6 +547,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
>>>>    */
>>>>   static void validate_numa_distance(MachineState *ms)
>>>>   {
>>>> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
>>>>       int src, dst;
>>>>       bool is_asymmetrical = false;
>>>>       int nb_numa_nodes = ms->numa_state->num_nodes;
>>>> @@ -575,6 +576,12 @@ static void validate_numa_distance(MachineState *ms)
>>>>       }
>>>>   
>>>>       if (is_asymmetrical) {
>>>> +        if (mc->forbid_asymmetrical_numa) {
>>>> +            error_report("This machine type does not support "
>>>> +                         "asymmetrical numa distances.");
>>>> +            exit(EXIT_FAILURE);
>>>> +        }
>>>> +
>>>>           for (src = 0; src < nb_numa_nodes; src++) {
>>>>               for (dst = 0; dst < nb_numa_nodes; dst++) {
>>>>                   if (src != dst && numa_info[src].distance[dst] == 0) {
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index dd2fa4826b..3b16edaf4c 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -4512,6 +4512,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>        */
>>>>       mc->numa_mem_align_shift = 28;
>>>>       mc->auto_enable_numa = true;
>>>> +    mc->forbid_asymmetrical_numa = true;
>>>>   
>>>>       smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
>>>>       smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
>>>> diff --git a/include/hw/boards.h b/include/hw/boards.h
>>>> index bc5b82ad20..dc6cdd1c53 100644
>>>> --- a/include/hw/boards.h
>>>> +++ b/include/hw/boards.h
>>>> @@ -215,6 +215,7 @@ struct MachineClass {
>>>>       bool nvdimm_supported;
>>>>       bool numa_mem_supported;
>>>>       bool auto_enable_numa;
>>>> +    bool forbid_asymmetrical_numa;
>>>>       const char *default_ram_id;
>>>>   
>>>>       HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
>>>
>>
>>
>>
>
Igor Mammedov Aug. 20, 2020, 2:29 p.m. UTC | #5
On Thu, 20 Aug 2020 07:33:00 -0300
Daniel Henrique Barboza <danielhb413@gmail.com> wrote:

> On 8/20/20 1:15 AM, David Gibson wrote:
> > On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:  
> >> On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:  
> >>> On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:  
> >>>> The pSeries machine does not support asymmetrical NUMA
> >>>> configurations.  
> >>>
> >>> This seems a bit oddly specific to have as a global machine class
> >>> property.
> >>>
> >>> Would it make more sense for machines with specific NUMA constraints
> >>> to just verify those during their initialization?  
> >>
> >> This would be much simpler.  However, I like the idea of
> >> representing machine-specific configuration validation rules as
> >> data that can eventually be exported to management software.  
> > 
> > Ah, ok, so basically the usual tradeoff between flexibility and
> > advertisability.  
> 
> 
> 
> To provide context, what I did here was inspired by this commit:
> 
> commit 0533ef5f2089f4f12a0ec5c8035e5e15ba0b5556
> Author: Tao Xu <tao3.xu@intel.com>
> Date:   Thu Sep 5 16:32:38 2019 +0800
> 
>      numa: Introduce MachineClass::auto_enable_numa for implicit NUMA node
> 
> 
> In this commit, exclusive NUMA code from spapr.c was taken and put it
> into numa.c, with a flag being set in spapr machine_init.

We have too many auto_enable_numa*, we probably should merge them into
just one auto_enable_numa if it's possible.

> 
> Thanks,
> 
> 
> DHB
> 
> 
> > 
> > So, in that case, I guess the question is whether we envisage "no
> > assymmetry" as a constraint common enough that it's worth creating an
> > advertisable rule or not.  If we only ever have one user, then we
> > haven't really done any better than hard coding the constraint in the
> > manageent software.
> > 
> > Of course to complicate matters, in the longer term we're looking at
> > removing that constraint from pseries - but doing so will be dependent
> > on the guest kernel understanding a new format for the NUMA
> > information in the device tree.  So qemu alone won't have enough
> > information to tell if such a configuration is possible or not.

If it's guest limitiation, then it doesn't sound like QEMU material to me.
It would be more appropriate to validate at mgmt level which knows what
guest is being used.

> >   
> >> (CCing John Snow, who had spent some time thinking about
> >> configuration validation recently.)
> >>
> >>  
> >>>>
> >>>> CC: Eduardo Habkost <ehabkost@redhat.com>
> >>>> CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> >>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> >>>> ---
> >>>>   hw/core/numa.c      | 7 +++++++
> >>>>   hw/ppc/spapr.c      | 1 +
> >>>>   include/hw/boards.h | 1 +
> >>>>   3 files changed, 9 insertions(+)
> >>>>
> >>>> diff --git a/hw/core/numa.c b/hw/core/numa.c
> >>>> index d1a94a14f8..1e81233c1d 100644
> >>>> --- a/hw/core/numa.c
> >>>> +++ b/hw/core/numa.c
> >>>> @@ -547,6 +547,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
> >>>>    */
> >>>>   static void validate_numa_distance(MachineState *ms)
> >>>>   {
> >>>> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> >>>>       int src, dst;
> >>>>       bool is_asymmetrical = false;
> >>>>       int nb_numa_nodes = ms->numa_state->num_nodes;
> >>>> @@ -575,6 +576,12 @@ static void validate_numa_distance(MachineState *ms)
> >>>>       }
> >>>>   
> >>>>       if (is_asymmetrical) {
> >>>> +        if (mc->forbid_asymmetrical_numa) {
> >>>> +            error_report("This machine type does not support "
> >>>> +                         "asymmetrical numa distances.");
> >>>> +            exit(EXIT_FAILURE);
> >>>> +        }
> >>>> +
> >>>>           for (src = 0; src < nb_numa_nodes; src++) {
> >>>>               for (dst = 0; dst < nb_numa_nodes; dst++) {
> >>>>                   if (src != dst && numa_info[src].distance[dst] == 0) {
> >>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>> index dd2fa4826b..3b16edaf4c 100644
> >>>> --- a/hw/ppc/spapr.c
> >>>> +++ b/hw/ppc/spapr.c
> >>>> @@ -4512,6 +4512,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>        */
> >>>>       mc->numa_mem_align_shift = 28;
> >>>>       mc->auto_enable_numa = true;
> >>>> +    mc->forbid_asymmetrical_numa = true;
> >>>>   
> >>>>       smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
> >>>>       smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
> >>>> diff --git a/include/hw/boards.h b/include/hw/boards.h
> >>>> index bc5b82ad20..dc6cdd1c53 100644
> >>>> --- a/include/hw/boards.h
> >>>> +++ b/include/hw/boards.h
> >>>> @@ -215,6 +215,7 @@ struct MachineClass {
> >>>>       bool nvdimm_supported;
> >>>>       bool numa_mem_supported;
> >>>>       bool auto_enable_numa;
> >>>> +    bool forbid_asymmetrical_numa;
> >>>>       const char *default_ram_id;
> >>>>   
> >>>>       HotplugHandler *(*get_hotplug_handler)(MachineState *machine,  
> >>>  
> >>
> >>
> >>  
> >   
>
Eduardo Habkost Aug. 20, 2020, 4:51 p.m. UTC | #6
On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
> On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
> > On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
> > > On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> > > > The pSeries machine does not support asymmetrical NUMA
> > > > configurations.
> > > 
> > > This seems a bit oddly specific to have as a global machine class
> > > property.
> > > 
> > > Would it make more sense for machines with specific NUMA constraints
> > > to just verify those during their initialization?
> > 
> > This would be much simpler.  However, I like the idea of
> > representing machine-specific configuration validation rules as
> > data that can eventually be exported to management software.
> 
> Ah, ok, so basically the usual tradeoff between flexibility and
> advertisability.
> 
> So, in that case, I guess the question is whether we envisage "no
> assymmetry" as a constraint common enough that it's worth creating an
> advertisable rule or not.  If we only ever have one user, then we
> haven't really done any better than hard coding the constraint in the
> manageent software.
> 
> Of course to complicate matters, in the longer term we're looking at
> removing that constraint from pseries - but doing so will be dependent
> on the guest kernel understanding a new format for the NUMA
> information in the device tree.  So qemu alone won't have enough
> information to tell if such a configuration is possible or not.

Requiring both QEMU (and possibly management software) to be
patched again after the guest kernel is fixed sounds undesirable.
Perhaps a warning would be better in this case?

In either case, it sounds like this won't be a common constraint
and I now agree with your original suggestion of doing this in
machine initialization code.
Igor Mammedov Aug. 21, 2020, 8:55 a.m. UTC | #7
On Thu, 20 Aug 2020 12:51:03 -0400
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
> > On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:  
> > > On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:  
> > > > On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:  
> > > > > The pSeries machine does not support asymmetrical NUMA
> > > > > configurations.  
> > > > 
> > > > This seems a bit oddly specific to have as a global machine class
> > > > property.
> > > > 
> > > > Would it make more sense for machines with specific NUMA constraints
> > > > to just verify those during their initialization?  
> > > 
> > > This would be much simpler.  However, I like the idea of
> > > representing machine-specific configuration validation rules as
> > > data that can eventually be exported to management software.  
> > 
> > Ah, ok, so basically the usual tradeoff between flexibility and
> > advertisability.
> > 
> > So, in that case, I guess the question is whether we envisage "no
> > assymmetry" as a constraint common enough that it's worth creating an
> > advertisable rule or not.  If we only ever have one user, then we
> > haven't really done any better than hard coding the constraint in the
> > manageent software.
> > 
> > Of course to complicate matters, in the longer term we're looking at
> > removing that constraint from pseries - but doing so will be dependent
> > on the guest kernel understanding a new format for the NUMA
> > information in the device tree.  So qemu alone won't have enough
> > information to tell if such a configuration is possible or not.  
> 
> Requiring both QEMU (and possibly management software) to be
> patched again after the guest kernel is fixed sounds undesirable.
If we drop this restriction, then we don't need to touch QEMU when
guest kernel is ready.

Btw, what spapr spec says about the matter?

> Perhaps a warning would be better in this case?
> 
> In either case, it sounds like this won't be a common constraint
> and I now agree with your original suggestion of doing this in
> machine initialization code.
Agreed, if it goes to spapr specific machine code I will not object much.
(it will burden just spapr maintainers, so it's about convincing
David in the end)
Daniel Henrique Barboza Aug. 21, 2020, 12:47 p.m. UTC | #8
On 8/21/20 5:55 AM, Igor Mammedov wrote:
> On Thu, 20 Aug 2020 12:51:03 -0400
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
>> On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
>>> On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
>>>> On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
>>>>> On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
>>>>>> The pSeries machine does not support asymmetrical NUMA
>>>>>> configurations.
>>>>>
>>>>> This seems a bit oddly specific to have as a global machine class
>>>>> property.
>>>>>
>>>>> Would it make more sense for machines with specific NUMA constraints
>>>>> to just verify those during their initialization?
>>>>
>>>> This would be much simpler.  However, I like the idea of
>>>> representing machine-specific configuration validation rules as
>>>> data that can eventually be exported to management software.
>>>
>>> Ah, ok, so basically the usual tradeoff between flexibility and
>>> advertisability.
>>>
>>> So, in that case, I guess the question is whether we envisage "no
>>> assymmetry" as a constraint common enough that it's worth creating an
>>> advertisable rule or not.  If we only ever have one user, then we
>>> haven't really done any better than hard coding the constraint in the
>>> manageent software.
>>>
>>> Of course to complicate matters, in the longer term we're looking at
>>> removing that constraint from pseries - but doing so will be dependent
>>> on the guest kernel understanding a new format for the NUMA
>>> information in the device tree.  So qemu alone won't have enough
>>> information to tell if such a configuration is possible or not.
>>
>> Requiring both QEMU (and possibly management software) to be
>> patched again after the guest kernel is fixed sounds undesirable.
> If we drop this restriction, then we don't need to touch QEMU when
> guest kernel is ready.
> 
> Btw, what spapr spec says about the matter?


LOPAPR support a somewhat asymmetrical NUMA setup in its current form, but
the Linux kernel doesn't support it. The effort to implement it in the current
spapr machine code, given that Linux wouldn't mind it, is not worth it. This
is why I chose to invalidate it for pseries.


> 
>> Perhaps a warning would be better in this case?
>>
>> In either case, it sounds like this won't be a common constraint
>> and I now agree with your original suggestion of doing this in
>> machine initialization code.
> Agreed, if it goes to spapr specific machine code I will not object much.
> (it will burden just spapr maintainers, so it's about convincing
> David in the end)

I believe he's ok with it given that he suggested it in his first reply.

I'll move this verification to spapr machine_init in the next version.



Thanks,

DHB


>
David Gibson Aug. 24, 2020, 6:08 a.m. UTC | #9
On Fri, Aug 21, 2020 at 09:47:47AM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/21/20 5:55 AM, Igor Mammedov wrote:
> > On Thu, 20 Aug 2020 12:51:03 -0400
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> > > On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
> > > > On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
> > > > > On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
> > > > > > On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> > > > > > > The pSeries machine does not support asymmetrical NUMA
> > > > > > > configurations.
> > > > > > 
> > > > > > This seems a bit oddly specific to have as a global machine class
> > > > > > property.
> > > > > > 
> > > > > > Would it make more sense for machines with specific NUMA constraints
> > > > > > to just verify those during their initialization?
> > > > > 
> > > > > This would be much simpler.  However, I like the idea of
> > > > > representing machine-specific configuration validation rules as
> > > > > data that can eventually be exported to management software.
> > > > 
> > > > Ah, ok, so basically the usual tradeoff between flexibility and
> > > > advertisability.
> > > > 
> > > > So, in that case, I guess the question is whether we envisage "no
> > > > assymmetry" as a constraint common enough that it's worth creating an
> > > > advertisable rule or not.  If we only ever have one user, then we
> > > > haven't really done any better than hard coding the constraint in the
> > > > manageent software.
> > > > 
> > > > Of course to complicate matters, in the longer term we're looking at
> > > > removing that constraint from pseries - but doing so will be dependent
> > > > on the guest kernel understanding a new format for the NUMA
> > > > information in the device tree.  So qemu alone won't have enough
> > > > information to tell if such a configuration is possible or not.
> > > 
> > > Requiring both QEMU (and possibly management software) to be
> > > patched again after the guest kernel is fixed sounds undesirable.
> > If we drop this restriction, then we don't need to touch QEMU when
> > guest kernel is ready.
> > 
> > Btw, what spapr spec says about the matter?
> 
> LOPAPR support a somewhat asymmetrical NUMA setup in its current
> form,

Huh, I didn't even realize that.  What's the mechanism?

> but
> the Linux kernel doesn't support it. The effort to implement it in the current
> spapr machine code, given that Linux wouldn't mind it, is not worth it. This
> is why I chose to invalidate it for pseries.

Igor,

It's kind of difficult to answer that question - PAPR doesn't
specifically describe limitations, it's just that the representation
it uses is inherently limited.  Instead of the obvious, simple and
pretty much universal method (used in the generic kernel and qemu) of
having a matrix of distance between all the nodes, it instead
describes the hierarchy of components that give rise to the different
distances.

So, for each NUMA relevant object (cpu, memory block, host bridge,
etc.) there is a vector of IDs.  Each number in the vector gives one
level of the objects location in the heirarchy.

So, for example the first number might be the physical chip/socket.
the second one which group of cores & memory interfaces sharing an Ln
cache, the third one the specific core number.  So to work out how far
objects are from each other you essentially look at how long a prefix
of their vector they share, which tells you how far above in the
hierarchy you have to go to reach it.

There's a bunch of complicating details, but that's the gist of it.

> > > Perhaps a warning would be better in this case?
> > > 
> > > In either case, it sounds like this won't be a common constraint
> > > and I now agree with your original suggestion of doing this in
> > > machine initialization code.
> > Agreed, if it goes to spapr specific machine code I will not object much.
> > (it will burden just spapr maintainers, so it's about convincing
> > David in the end)
> 
> I believe he's ok with it given that he suggested it in his first reply.
> 
> I'll move this verification to spapr machine_init in the next version.
> 
> 
> 
> Thanks,
> 
> DHB
> 
> 
> > 
>
Daniel Henrique Barboza Aug. 24, 2020, 11:45 a.m. UTC | #10
On 8/24/20 3:08 AM, David Gibson wrote:
> On Fri, Aug 21, 2020 at 09:47:47AM -0300, Daniel Henrique Barboza wrote:
>>
>>
>> On 8/21/20 5:55 AM, Igor Mammedov wrote:
>>> On Thu, 20 Aug 2020 12:51:03 -0400
>>> Eduardo Habkost <ehabkost@redhat.com> wrote:
>>>
>>>> On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
>>>>> On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
>>>>>> On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
>>>>>>> On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
>>>>>>>> The pSeries machine does not support asymmetrical NUMA
>>>>>>>> configurations.
>>>>>>>
>>>>>>> This seems a bit oddly specific to have as a global machine class
>>>>>>> property.
>>>>>>>
>>>>>>> Would it make more sense for machines with specific NUMA constraints
>>>>>>> to just verify those during their initialization?
>>>>>>
>>>>>> This would be much simpler.  However, I like the idea of
>>>>>> representing machine-specific configuration validation rules as
>>>>>> data that can eventually be exported to management software.
>>>>>
>>>>> Ah, ok, so basically the usual tradeoff between flexibility and
>>>>> advertisability.
>>>>>
>>>>> So, in that case, I guess the question is whether we envisage "no
>>>>> assymmetry" as a constraint common enough that it's worth creating an
>>>>> advertisable rule or not.  If we only ever have one user, then we
>>>>> haven't really done any better than hard coding the constraint in the
>>>>> manageent software.
>>>>>
>>>>> Of course to complicate matters, in the longer term we're looking at
>>>>> removing that constraint from pseries - but doing so will be dependent
>>>>> on the guest kernel understanding a new format for the NUMA
>>>>> information in the device tree.  So qemu alone won't have enough
>>>>> information to tell if such a configuration is possible or not.
>>>>
>>>> Requiring both QEMU (and possibly management software) to be
>>>> patched again after the guest kernel is fixed sounds undesirable.
>>> If we drop this restriction, then we don't need to touch QEMU when
>>> guest kernel is ready.
>>>
>>> Btw, what spapr spec says about the matter?
>>
>> LOPAPR support a somewhat asymmetrical NUMA setup in its current
>> form,
> 
> Huh, I didn't even realize that.  What's the mechanism?

LOPAPR mentions that a single resource/node can have multiple associativity
arrays. The idea is to contemplate the situations where the node has
more than one connection with the board.

I say "somewhat" because, right after mentioning that, the spec also says that
the OS should consider that the distance between two nodes must always be
the shortest one of all available arrays. I'll copy/paste the except here
(end of section 15.2, "Numa Resource Associativity":

-----

The reason that the “ibm,associativity” property may contain multiple associativity
lists is that a resource may be multiply connected into the platform. This resource
then has a different associativity characteristics relative to its multiple connections.
To determine the associativity between any two resources, the OS scans down the two
resources associativity lists in all pair wise combinations counting how many domains
are the same until the first domain where the two list do not agree. The highest such
count is the associativity between the two resources.

----


DHB


> 
>> but
>> the Linux kernel doesn't support it. The effort to implement it in the current
>> spapr machine code, given that Linux wouldn't mind it, is not worth it. This
>> is why I chose to invalidate it for pseries.
> 
> Igor,
> 
> It's kind of difficult to answer that question - PAPR doesn't
> specifically describe limitations, it's just that the representation
> it uses is inherently limited.  Instead of the obvious, simple and
> pretty much universal method (used in the generic kernel and qemu) of
> having a matrix of distance between all the nodes, it instead
> describes the hierarchy of components that give rise to the different
> distances.
> 
> So, for each NUMA relevant object (cpu, memory block, host bridge,
> etc.) there is a vector of IDs.  Each number in the vector gives one
> level of the objects location in the heirarchy.
> 
> So, for example the first number might be the physical chip/socket.
> the second one which group of cores & memory interfaces sharing an Ln
> cache, the third one the specific core number.  So to work out how far
> objects are from each other you essentially look at how long a prefix
> of their vector they share, which tells you how far above in the
> hierarchy you have to go to reach it.
> 
> There's a bunch of complicating details, but that's the gist of it.
> 
>>>> Perhaps a warning would be better in this case?
>>>>
>>>> In either case, it sounds like this won't be a common constraint
>>>> and I now agree with your original suggestion of doing this in
>>>> machine initialization code.
>>> Agreed, if it goes to spapr specific machine code I will not object much.
>>> (it will burden just spapr maintainers, so it's about convincing
>>> David in the end)
>>
>> I believe he's ok with it given that he suggested it in his first reply.
>>
>> I'll move this verification to spapr machine_init in the next version.
>>
>>
>>
>> Thanks,
>>
>> DHB
>>
>>
>>>
>>
>
David Gibson Aug. 24, 2020, 11:49 p.m. UTC | #11
On Mon, Aug 24, 2020 at 08:45:12AM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/24/20 3:08 AM, David Gibson wrote:
> > On Fri, Aug 21, 2020 at 09:47:47AM -0300, Daniel Henrique Barboza wrote:
> > > 
> > > 
> > > On 8/21/20 5:55 AM, Igor Mammedov wrote:
> > > > On Thu, 20 Aug 2020 12:51:03 -0400
> > > > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > > > 
> > > > > On Thu, Aug 20, 2020 at 02:15:04PM +1000, David Gibson wrote:
> > > > > > On Wed, Aug 19, 2020 at 10:11:28PM -0400, Eduardo Habkost wrote:
> > > > > > > On Thu, Aug 20, 2020 at 11:17:26AM +1000, David Gibson wrote:
> > > > > > > > On Fri, Aug 14, 2020 at 05:54:16PM -0300, Daniel Henrique Barboza wrote:
> > > > > > > > > The pSeries machine does not support asymmetrical NUMA
> > > > > > > > > configurations.
> > > > > > > > 
> > > > > > > > This seems a bit oddly specific to have as a global machine class
> > > > > > > > property.
> > > > > > > > 
> > > > > > > > Would it make more sense for machines with specific NUMA constraints
> > > > > > > > to just verify those during their initialization?
> > > > > > > 
> > > > > > > This would be much simpler.  However, I like the idea of
> > > > > > > representing machine-specific configuration validation rules as
> > > > > > > data that can eventually be exported to management software.
> > > > > > 
> > > > > > Ah, ok, so basically the usual tradeoff between flexibility and
> > > > > > advertisability.
> > > > > > 
> > > > > > So, in that case, I guess the question is whether we envisage "no
> > > > > > assymmetry" as a constraint common enough that it's worth creating an
> > > > > > advertisable rule or not.  If we only ever have one user, then we
> > > > > > haven't really done any better than hard coding the constraint in the
> > > > > > manageent software.
> > > > > > 
> > > > > > Of course to complicate matters, in the longer term we're looking at
> > > > > > removing that constraint from pseries - but doing so will be dependent
> > > > > > on the guest kernel understanding a new format for the NUMA
> > > > > > information in the device tree.  So qemu alone won't have enough
> > > > > > information to tell if such a configuration is possible or not.
> > > > > 
> > > > > Requiring both QEMU (and possibly management software) to be
> > > > > patched again after the guest kernel is fixed sounds undesirable.
> > > > If we drop this restriction, then we don't need to touch QEMU when
> > > > guest kernel is ready.
> > > > 
> > > > Btw, what spapr spec says about the matter?
> > > 
> > > LOPAPR support a somewhat asymmetrical NUMA setup in its current
> > > form,
> > 
> > Huh, I didn't even realize that.  What's the mechanism?
> 
> LOPAPR mentions that a single resource/node can have multiple associativity
> arrays. The idea is to contemplate the situations where the node has
> more than one connection with the board.
> 
> I say "somewhat" because, right after mentioning that, the spec also says that
> the OS should consider that the distance between two nodes must always be
> the shortest one of all available arrays. I'll copy/paste the except here
> (end of section 15.2, "Numa Resource Associativity":

Ah.  I didn't think that's what "asymmetric NUMA" meant... but come to
think of it, I'm not very sure about that.

> -----
> 
> The reason that the “ibm,associativity” property may contain multiple associativity
> lists is that a resource may be multiply connected into the platform. This resource
> then has a different associativity characteristics relative to its multiple connections.
> To determine the associativity between any two resources, the OS scans down the two
> resources associativity lists in all pair wise combinations counting how many domains
> are the same until the first domain where the two list do not agree. The highest such
> count is the associativity between the two resources.
> 
> ----
> 
> 
> DHB
> 
> 
> > 
> > > but
> > > the Linux kernel doesn't support it. The effort to implement it in the current
> > > spapr machine code, given that Linux wouldn't mind it, is not worth it. This
> > > is why I chose to invalidate it for pseries.
> > 
> > Igor,
> > 
> > It's kind of difficult to answer that question - PAPR doesn't
> > specifically describe limitations, it's just that the representation
> > it uses is inherently limited.  Instead of the obvious, simple and
> > pretty much universal method (used in the generic kernel and qemu) of
> > having a matrix of distance between all the nodes, it instead
> > describes the hierarchy of components that give rise to the different
> > distances.
> > 
> > So, for each NUMA relevant object (cpu, memory block, host bridge,
> > etc.) there is a vector of IDs.  Each number in the vector gives one
> > level of the objects location in the heirarchy.
> > 
> > So, for example the first number might be the physical chip/socket.
> > the second one which group of cores & memory interfaces sharing an Ln
> > cache, the third one the specific core number.  So to work out how far
> > objects are from each other you essentially look at how long a prefix
> > of their vector they share, which tells you how far above in the
> > hierarchy you have to go to reach it.
> > 
> > There's a bunch of complicating details, but that's the gist of it.
> > 
> > > > > Perhaps a warning would be better in this case?
> > > > > 
> > > > > In either case, it sounds like this won't be a common constraint
> > > > > and I now agree with your original suggestion of doing this in
> > > > > machine initialization code.
> > > > Agreed, if it goes to spapr specific machine code I will not object much.
> > > > (it will burden just spapr maintainers, so it's about convincing
> > > > David in the end)
> > > 
> > > I believe he's ok with it given that he suggested it in his first reply.
> > > 
> > > I'll move this verification to spapr machine_init in the next version.
> > > 
> > > 
> > > 
> > > Thanks,
> > > 
> > > DHB
> > > 
> > > 
> > > > 
> > > 
> > 
>
Daniel Henrique Barboza Aug. 25, 2020, 9:56 a.m. UTC | #12
On 8/24/20 8:49 PM, David Gibson wrote:
> On Mon, Aug 24, 2020 at 08:45:12AM -0300, Daniel Henrique Barboza wrote:
>>
>>

[...]

>>>> LOPAPR support a somewhat asymmetrical NUMA setup in its current
>>>> form,
>>>
>>> Huh, I didn't even realize that.  What's the mechanism?
>>
>> LOPAPR mentions that a single resource/node can have multiple associativity
>> arrays. The idea is to contemplate the situations where the node has
>> more than one connection with the board.
>>
>> I say "somewhat" because, right after mentioning that, the spec also says that
>> the OS should consider that the distance between two nodes must always be
>> the shortest one of all available arrays. I'll copy/paste the except here
>> (end of section 15.2, "Numa Resource Associativity":
> 
> Ah.  I didn't think that's what "asymmetric NUMA" meant... but come to
> think of it, I'm not very sure about that.


This was a poor attempt of my part to cut PAPR some slack.

TBH, even if current PAPR allows for some form of NUMA asymmetry, I don't think
it's worth implementing at all. It'll be more complexity on top of what I
already added here, and the best case scenario will be the kernel ignoring it
(worst case - kernel blowing it up because we're adding more associativity
arrays in each CPU and so on).



Thanks,


DHB

> 
>> -----
>>
>> The reason that the “ibm,associativity” property may contain multiple associativity
>> lists is that a resource may be multiply connected into the platform. This resource
>> then has a different associativity characteristics relative to its multiple connections.
>> To determine the associativity between any two resources, the OS scans down the two
>> resources associativity lists in all pair wise combinations counting how many domains
>> are the same until the first domain where the two list do not agree. The highest such
>> count is the associativity between the two resources.
>>
>> ----
>>
>>
>> DHB
>>
>>
>>>
>>>> but
>>>> the Linux kernel doesn't support it. The effort to implement it in the current
>>>> spapr machine code, given that Linux wouldn't mind it, is not worth it. This
>>>> is why I chose to invalidate it for pseries.
>>>
>>> Igor,
>>>
>>> It's kind of difficult to answer that question - PAPR doesn't
>>> specifically describe limitations, it's just that the representation
>>> it uses is inherently limited.  Instead of the obvious, simple and
>>> pretty much universal method (used in the generic kernel and qemu) of
>>> having a matrix of distance between all the nodes, it instead
>>> describes the hierarchy of components that give rise to the different
>>> distances.
>>>
>>> So, for each NUMA relevant object (cpu, memory block, host bridge,
>>> etc.) there is a vector of IDs.  Each number in the vector gives one
>>> level of the objects location in the heirarchy.
>>>
>>> So, for example the first number might be the physical chip/socket.
>>> the second one which group of cores & memory interfaces sharing an Ln
>>> cache, the third one the specific core number.  So to work out how far
>>> objects are from each other you essentially look at how long a prefix
>>> of their vector they share, which tells you how far above in the
>>> hierarchy you have to go to reach it.
>>>
>>> There's a bunch of complicating details, but that's the gist of it.
>>>
>>>>>> Perhaps a warning would be better in this case?
>>>>>>
>>>>>> In either case, it sounds like this won't be a common constraint
>>>>>> and I now agree with your original suggestion of doing this in
>>>>>> machine initialization code.
>>>>> Agreed, if it goes to spapr specific machine code I will not object much.
>>>>> (it will burden just spapr maintainers, so it's about convincing
>>>>> David in the end)
>>>>
>>>> I believe he's ok with it given that he suggested it in his first reply.
>>>>
>>>> I'll move this verification to spapr machine_init in the next version.
>>>>
>>>>
>>>>
>>>> Thanks,
>>>>
>>>> DHB
>>>>
>>>>
>>>>>
>>>>
>>>
>>
>
David Gibson Aug. 25, 2020, 11:12 a.m. UTC | #13
On Tue, Aug 25, 2020 at 06:56:46AM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/24/20 8:49 PM, David Gibson wrote:
> > On Mon, Aug 24, 2020 at 08:45:12AM -0300, Daniel Henrique Barboza wrote:
> > > 
> > > 
> 
> [...]
> 
> > > > > LOPAPR support a somewhat asymmetrical NUMA setup in its current
> > > > > form,
> > > > 
> > > > Huh, I didn't even realize that.  What's the mechanism?
> > > 
> > > LOPAPR mentions that a single resource/node can have multiple associativity
> > > arrays. The idea is to contemplate the situations where the node has
> > > more than one connection with the board.
> > > 
> > > I say "somewhat" because, right after mentioning that, the spec also says that
> > > the OS should consider that the distance between two nodes must always be
> > > the shortest one of all available arrays. I'll copy/paste the except here
> > > (end of section 15.2, "Numa Resource Associativity":
> > 
> > Ah.  I didn't think that's what "asymmetric NUMA" meant... but come to
> > think of it, I'm not very sure about that.
> 
> 
> This was a poor attempt of my part to cut PAPR some slack.
> 
> TBH, even if current PAPR allows for some form of NUMA asymmetry, I don't think
> it's worth implementing at all. It'll be more complexity on top of what I
> already added here, and the best case scenario will be the kernel ignoring it
> (worst case - kernel blowing it up because we're adding more associativity
> arrays in each CPU and so on).

Yes, I agree.
John Snow Sept. 23, 2020, 3:21 p.m. UTC | #14
On 8/20/20 12:51 PM, Eduardo Habkost wrote:
> 
> In either case, it sounds like this won't be a common constraint
> and I now agree with your original suggestion of doing this in
> machine initialization code.

I'm late to this chat, but not every constraint needs to be modeled as 
data -- it's okay to do validation in the machine initialization code.

(It can always be changed later!)

I have been more concerned with schema-based validation of the shape of 
data, but which boards support NUMA or not falls perfectly into the 
realm of semantic validation that is outside the realm of static data 
validation, I think.

--js
diff mbox series

Patch

diff --git a/hw/core/numa.c b/hw/core/numa.c
index d1a94a14f8..1e81233c1d 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -547,6 +547,7 @@  static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
  */
 static void validate_numa_distance(MachineState *ms)
 {
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
     int src, dst;
     bool is_asymmetrical = false;
     int nb_numa_nodes = ms->numa_state->num_nodes;
@@ -575,6 +576,12 @@  static void validate_numa_distance(MachineState *ms)
     }
 
     if (is_asymmetrical) {
+        if (mc->forbid_asymmetrical_numa) {
+            error_report("This machine type does not support "
+                         "asymmetrical numa distances.");
+            exit(EXIT_FAILURE);
+        }
+
         for (src = 0; src < nb_numa_nodes; src++) {
             for (dst = 0; dst < nb_numa_nodes; dst++) {
                 if (src != dst && numa_info[src].distance[dst] == 0) {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index dd2fa4826b..3b16edaf4c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4512,6 +4512,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
      */
     mc->numa_mem_align_shift = 28;
     mc->auto_enable_numa = true;
+    mc->forbid_asymmetrical_numa = true;
 
     smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index bc5b82ad20..dc6cdd1c53 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -215,6 +215,7 @@  struct MachineClass {
     bool nvdimm_supported;
     bool numa_mem_supported;
     bool auto_enable_numa;
+    bool forbid_asymmetrical_numa;
     const char *default_ram_id;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,