Message ID | 20210519001008.23831-3-dwilder@us.ibm.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Support for non-contiguous numa nodes and core ids. | expand |
-----Original Message----- From: dev <ovs-dev-bounces@openvswitch.org> On Behalf Of David Wilder Sent: Wednesday 19 May 2021 01:10 To: ovs-dev@openvswitch.org Cc: daniele.di.proietto@gmail.com; wilder@us.ibm.com; i.maximets@ovn.org Subject: [ovs-dev] [PATCH v4 2/2] ovs-numa: Dpdk options with non-contiguous nodes If not supplied by the user --socket-mem and --socket-limit EAL options are set to a default value based system topology. The assumption that numa nodes must be numbered contiguously is removed by this change. These options can be seen in the ovs-vswitchd.log. For example: a system containing only numa nodes 0 and 8 will generate the following: EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \ --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0 Signed-off-by: David Wilder <dwilder@us.ibm.com> Reviewed-by: David Christensen <drc@linux.vnet.ibm.com> --- lib/dpdk.c | 26 ++++++++++++++++++-------- lib/ovs-numa.c | 1 - lib/ovs-numa.h | 2 ++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/dpdk.c b/lib/dpdk.c index 3195403..918bc80 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -133,18 +133,28 @@ static char * construct_dpdk_socket_mem(void) { const char *def_value = "1024"; - int numa, numa_nodes = ovs_numa_get_n_numas(); + struct ovs_numa_dump *dump; + int last_node = 0; + + /* Build a list of all numa nodes with at least one core */ + dump = ovs_numa_dump_n_cores_per_numa(1); struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER; + ds_put_cstr(&dpdk_socket_mem, def_value); - if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) { - numa_nodes = 1; - } + const struct ovs_numa_info_numa *node; - ds_put_cstr(&dpdk_socket_mem, def_value); - for (numa = 1; numa < numa_nodes; ++numa) { - ds_put_format(&dpdk_socket_mem, ",%s", def_value); + FOR_EACH_NUMA_ON_DUMP(node, dump) { + while (node->numa_id > last_node+1 && + node->numa_id != OVS_NUMA_UNSPEC && + node->numa_id <= MAX_NUMA_NODES){ + ds_put_format(&dpdk_socket_mem, ",%s", "0"); + ++last_node; + } + if (node->numa_id != 0) { + ds_put_format(&dpdk_socket_mem, ",%s", def_value); + } } - + ovs_numa_dump_destroy(dump); return ds_cstr(&dpdk_socket_mem); } diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index d2e7cc6..de1d037 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); * TODO: Fix ovs-numa when cpu hotplug is used. */ -#define MAX_NUMA_NODES 128 /* numa node. */ struct numa_node { diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 8f2ea34..ecc251a 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -26,6 +26,8 @@ #define OVS_CORE_UNSPEC INT_MAX #define OVS_NUMA_UNSPEC INT_MAX +#define MAX_NUMA_NODES 128 + /* Dump of a list of 'struct ovs_numa_info'. */ struct ovs_numa_dump { struct hmap cores; -- 1.8.3.1 Tested-by: Emma Finn <emma.finn@intel.com> Thanks, Emma
On 19/05/2021 01:10, David Wilder wrote: > If not supplied by the user --socket-mem and --socket-limit EAL options > are set to a default value based system topology. The assumption that > numa nodes must be numbered contiguously is removed by this change. > > These options can be seen in the ovs-vswitchd.log. For example: > a system containing only numa nodes 0 and 8 will generate the following: > > EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \ > --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0 > > Signed-off-by: David Wilder <dwilder@us.ibm.com> > Reviewed-by: David Christensen <drc@linux.vnet.ibm.com> > --- > lib/dpdk.c | 26 ++++++++++++++++++-------- > lib/ovs-numa.c | 1 - > lib/ovs-numa.h | 2 ++ > 3 files changed, 20 insertions(+), 9 deletions(-) > > diff --git a/lib/dpdk.c b/lib/dpdk.c > index 3195403..918bc80 100644 > --- a/lib/dpdk.c > +++ b/lib/dpdk.c > @@ -133,18 +133,28 @@ static char * > construct_dpdk_socket_mem(void) > { > const char *def_value = "1024"; > - int numa, numa_nodes = ovs_numa_get_n_numas(); > + struct ovs_numa_dump *dump; > + int last_node = 0; > + > + /* Build a list of all numa nodes with at least one core */ > + dump = ovs_numa_dump_n_cores_per_numa(1); > struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER; > + ds_put_cstr(&dpdk_socket_mem, def_value); This is assuming that there is always a numa 0, is it valid? the other numa nodes are discovered, should it be done for numa 0 too? > > - if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) { > - numa_nodes = 1; > - } > + const struct ovs_numa_info_numa *node; > > - ds_put_cstr(&dpdk_socket_mem, def_value); > - for (numa = 1; numa < numa_nodes; ++numa) { > - ds_put_format(&dpdk_socket_mem, ",%s", def_value); > + FOR_EACH_NUMA_ON_DUMP(node, dump) { > + while (node->numa_id > last_node+1 && > + node->numa_id != OVS_NUMA_UNSPEC && > + node->numa_id <= MAX_NUMA_NODES){ > + ds_put_format(&dpdk_socket_mem, ",%s", "0"); minor: indent is off a bit here > + ++last_node; If I had 4 contiguous numa nodes it looks like it would get '1024,1024,0,1024,0,1024' which is incorrect. You need to update last_node every time you go through the for loop and remove the +1 in case there is a gap. What about something like below (not tested). Maybe you can simplify. diff --git a/lib/dpdk.c b/lib/dpdk.c index 918bc80c5..ecf563472 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -140,18 +140,24 @@ construct_dpdk_socket_mem(void) dump = ovs_numa_dump_n_cores_per_numa(1); struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER; - ds_put_cstr(&dpdk_socket_mem, def_value); const struct ovs_numa_info_numa *node; FOR_EACH_NUMA_ON_DUMP(node, dump) { - while (node->numa_id > last_node+1 && + while (node->numa_id > last_node && node->numa_id != OVS_NUMA_UNSPEC && node->numa_id <= MAX_NUMA_NODES){ + if (node->numa_id == 0) { + ds_put_format(&dpdk_socket_mem, "%s", "0"); + } else { ds_put_format(&dpdk_socket_mem, ",%s", "0"); - ++last_node; + } + last_node++; } - if (node->numa_id != 0) { - ds_put_format(&dpdk_socket_mem, ",%s", def_value); + if (node->numa_id == 0) { + ds_put_format(&dpdk_socket_mem, "%s", def_value); + } else { + ds_put_format(&dpdk_socket_mem, ",%s", def_value); } + last_node++; } > + } > + if (node->numa_id != 0) { > + ds_put_format(&dpdk_socket_mem, ",%s", def_value); > + } > } > - > + ovs_numa_dump_destroy(dump); > return ds_cstr(&dpdk_socket_mem); > } > > diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c > index d2e7cc6..de1d037 100644 > --- a/lib/ovs-numa.c > +++ b/lib/ovs-numa.c > @@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); > * TODO: Fix ovs-numa when cpu hotplug is used. > */ > > -#define MAX_NUMA_NODES 128 > > /* numa node. */ > struct numa_node { > diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h > index 8f2ea34..ecc251a 100644 > --- a/lib/ovs-numa.h > +++ b/lib/ovs-numa.h > @@ -26,6 +26,8 @@ > #define OVS_CORE_UNSPEC INT_MAX > #define OVS_NUMA_UNSPEC INT_MAX > > +#define MAX_NUMA_NODES 128 > + > /* Dump of a list of 'struct ovs_numa_info'. */ > struct ovs_numa_dump { > struct hmap cores; >
diff --git a/lib/dpdk.c b/lib/dpdk.c index 3195403..918bc80 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -133,18 +133,28 @@ static char * construct_dpdk_socket_mem(void) { const char *def_value = "1024"; - int numa, numa_nodes = ovs_numa_get_n_numas(); + struct ovs_numa_dump *dump; + int last_node = 0; + + /* Build a list of all numa nodes with at least one core */ + dump = ovs_numa_dump_n_cores_per_numa(1); struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER; + ds_put_cstr(&dpdk_socket_mem, def_value); - if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) { - numa_nodes = 1; - } + const struct ovs_numa_info_numa *node; - ds_put_cstr(&dpdk_socket_mem, def_value); - for (numa = 1; numa < numa_nodes; ++numa) { - ds_put_format(&dpdk_socket_mem, ",%s", def_value); + FOR_EACH_NUMA_ON_DUMP(node, dump) { + while (node->numa_id > last_node+1 && + node->numa_id != OVS_NUMA_UNSPEC && + node->numa_id <= MAX_NUMA_NODES){ + ds_put_format(&dpdk_socket_mem, ",%s", "0"); + ++last_node; + } + if (node->numa_id != 0) { + ds_put_format(&dpdk_socket_mem, ",%s", def_value); + } } - + ovs_numa_dump_destroy(dump); return ds_cstr(&dpdk_socket_mem); } diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index d2e7cc6..de1d037 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); * TODO: Fix ovs-numa when cpu hotplug is used. */ -#define MAX_NUMA_NODES 128 /* numa node. */ struct numa_node { diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 8f2ea34..ecc251a 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -26,6 +26,8 @@ #define OVS_CORE_UNSPEC INT_MAX #define OVS_NUMA_UNSPEC INT_MAX +#define MAX_NUMA_NODES 128 + /* Dump of a list of 'struct ovs_numa_info'. */ struct ovs_numa_dump { struct hmap cores;