diff mbox series

[ovs-dev,v4,2/2] ovs-numa: Dpdk options with non-contiguous nodes

Message ID 20210519001008.23831-3-dwilder@us.ibm.com
State Changes Requested
Headers show
Series Support for non-contiguous numa nodes and core ids. | expand

Commit Message

David Wilder May 19, 2021, 12:10 a.m. UTC
If not supplied by the user --socket-mem and --socket-limit EAL options
are set to a default value based system topology. The assumption that
numa nodes must be numbered contiguously is removed by this change.

These options can be seen in the ovs-vswitchd.log.  For example:
a system containing only numa nodes 0 and 8 will generate the following:

EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \
                         --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0

Signed-off-by: David Wilder <dwilder@us.ibm.com>
Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>
---
 lib/dpdk.c     | 26 ++++++++++++++++++--------
 lib/ovs-numa.c |  1 -
 lib/ovs-numa.h |  2 ++
 3 files changed, 20 insertions(+), 9 deletions(-)

Comments

Emma Finn May 20, 2021, 9:17 a.m. UTC | #1
-----Original Message-----
From: dev <ovs-dev-bounces@openvswitch.org> On Behalf Of David Wilder
Sent: Wednesday 19 May 2021 01:10
To: ovs-dev@openvswitch.org
Cc: daniele.di.proietto@gmail.com; wilder@us.ibm.com; i.maximets@ovn.org
Subject: [ovs-dev] [PATCH v4 2/2] ovs-numa: Dpdk options with non-contiguous nodes

If not supplied by the user --socket-mem and --socket-limit EAL options are set to a default value based system topology. The assumption that numa nodes must be numbered contiguously is removed by this change.

These options can be seen in the ovs-vswitchd.log.  For example:
a system containing only numa nodes 0 and 8 will generate the following:

EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \
                         --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0

Signed-off-by: David Wilder <dwilder@us.ibm.com>
Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>
---
 lib/dpdk.c     | 26 ++++++++++++++++++--------
 lib/ovs-numa.c |  1 -
 lib/ovs-numa.h |  2 ++
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 3195403..918bc80 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -133,18 +133,28 @@ static char *
 construct_dpdk_socket_mem(void)
 {
     const char *def_value = "1024";
-    int numa, numa_nodes = ovs_numa_get_n_numas();
+    struct ovs_numa_dump *dump;
+    int last_node = 0;
+
+    /* Build a list of all numa nodes with at least one core */
+    dump = ovs_numa_dump_n_cores_per_numa(1);
     struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
+    ds_put_cstr(&dpdk_socket_mem, def_value);
 
-    if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
-        numa_nodes = 1;
-    }
+    const struct ovs_numa_info_numa *node;
 
-    ds_put_cstr(&dpdk_socket_mem, def_value);
-    for (numa = 1; numa < numa_nodes; ++numa) {
-        ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+    FOR_EACH_NUMA_ON_DUMP(node, dump) {
+        while (node->numa_id > last_node+1 &&
+               node->numa_id != OVS_NUMA_UNSPEC &&
+               node->numa_id <= MAX_NUMA_NODES){
+                ds_put_format(&dpdk_socket_mem, ",%s", "0");
+                ++last_node;
+        }
+        if (node->numa_id != 0) {
+                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+        }
     }
-
+    ovs_numa_dump_destroy(dump);
     return ds_cstr(&dpdk_socket_mem);
 }
 
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index d2e7cc6..de1d037 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
  * TODO: Fix ovs-numa when cpu hotplug is used.
  */
 
-#define MAX_NUMA_NODES 128
 
 /* numa node. */
 struct numa_node {
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 8f2ea34..ecc251a 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -26,6 +26,8 @@
 #define OVS_CORE_UNSPEC INT_MAX
 #define OVS_NUMA_UNSPEC INT_MAX
 
+#define MAX_NUMA_NODES 128
+
 /* Dump of a list of 'struct ovs_numa_info'. */  struct ovs_numa_dump {
     struct hmap cores;
--
1.8.3.1

Tested-by: Emma Finn <emma.finn@intel.com>

Thanks, 
Emma
Kevin Traynor May 21, 2021, 5:16 p.m. UTC | #2
On 19/05/2021 01:10, David Wilder wrote:
> If not supplied by the user --socket-mem and --socket-limit EAL options
> are set to a default value based system topology. The assumption that
> numa nodes must be numbered contiguously is removed by this change.
> 
> These options can be seen in the ovs-vswitchd.log.  For example:
> a system containing only numa nodes 0 and 8 will generate the following:
> 
> EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \
>                          --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0
> 
> Signed-off-by: David Wilder <dwilder@us.ibm.com>
> Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>
> ---
>  lib/dpdk.c     | 26 ++++++++++++++++++--------
>  lib/ovs-numa.c |  1 -
>  lib/ovs-numa.h |  2 ++
>  3 files changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/dpdk.c b/lib/dpdk.c
> index 3195403..918bc80 100644
> --- a/lib/dpdk.c
> +++ b/lib/dpdk.c
> @@ -133,18 +133,28 @@ static char *
>  construct_dpdk_socket_mem(void)
>  {
>      const char *def_value = "1024";
> -    int numa, numa_nodes = ovs_numa_get_n_numas();
> +    struct ovs_numa_dump *dump;
> +    int last_node = 0;
> +
> +    /* Build a list of all numa nodes with at least one core */
> +    dump = ovs_numa_dump_n_cores_per_numa(1);
>      struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
> +    ds_put_cstr(&dpdk_socket_mem, def_value);

This is assuming that there is always a numa 0, is it valid? the other
numa nodes are discovered, should it be done for numa 0 too?

>  
> -    if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
> -        numa_nodes = 1;
> -    }
> +    const struct ovs_numa_info_numa *node;
>  
> -    ds_put_cstr(&dpdk_socket_mem, def_value);
> -    for (numa = 1; numa < numa_nodes; ++numa) {
> -        ds_put_format(&dpdk_socket_mem, ",%s", def_value);
> +    FOR_EACH_NUMA_ON_DUMP(node, dump) {
> +        while (node->numa_id > last_node+1 &&
> +               node->numa_id != OVS_NUMA_UNSPEC &&
> +               node->numa_id <= MAX_NUMA_NODES){
> +                ds_put_format(&dpdk_socket_mem, ",%s", "0");

minor: indent is off a bit here

> +                ++last_node;

If I had 4 contiguous numa nodes it looks like it would get
'1024,1024,0,1024,0,1024' which is incorrect. You need to update
last_node every time you go through the for loop and remove the +1 in
case there is a gap.

What about something like below (not tested). Maybe you can simplify.

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 918bc80c5..ecf563472 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -140,18 +140,24 @@ construct_dpdk_socket_mem(void)
     dump = ovs_numa_dump_n_cores_per_numa(1);
     struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
-    ds_put_cstr(&dpdk_socket_mem, def_value);

     const struct ovs_numa_info_numa *node;

     FOR_EACH_NUMA_ON_DUMP(node, dump) {
-        while (node->numa_id > last_node+1 &&
+        while (node->numa_id > last_node &&
                node->numa_id != OVS_NUMA_UNSPEC &&
                node->numa_id <= MAX_NUMA_NODES){
+            if (node->numa_id == 0) {
+                ds_put_format(&dpdk_socket_mem, "%s", "0");
+            } else {
                 ds_put_format(&dpdk_socket_mem, ",%s", "0");
-                ++last_node;
+            }
+            last_node++;
         }
-        if (node->numa_id != 0) {
-                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+        if (node->numa_id == 0) {
+            ds_put_format(&dpdk_socket_mem, "%s", def_value);
+        } else {
+            ds_put_format(&dpdk_socket_mem, ",%s", def_value);
         }
+        last_node++;
     }

> +        }
> +        if (node->numa_id != 0) {
> +                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
> +        }
>      }
> -
> +    ovs_numa_dump_destroy(dump);
>      return ds_cstr(&dpdk_socket_mem);
>  }
>  
> diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
> index d2e7cc6..de1d037 100644
> --- a/lib/ovs-numa.c
> +++ b/lib/ovs-numa.c
> @@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
>   * TODO: Fix ovs-numa when cpu hotplug is used.
>   */
>  
> -#define MAX_NUMA_NODES 128
>  
>  /* numa node. */
>  struct numa_node {
> diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
> index 8f2ea34..ecc251a 100644
> --- a/lib/ovs-numa.h
> +++ b/lib/ovs-numa.h
> @@ -26,6 +26,8 @@
>  #define OVS_CORE_UNSPEC INT_MAX
>  #define OVS_NUMA_UNSPEC INT_MAX
>  
> +#define MAX_NUMA_NODES 128
> +
>  /* Dump of a list of 'struct ovs_numa_info'. */
>  struct ovs_numa_dump {
>      struct hmap cores;
>
diff mbox series

Patch

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 3195403..918bc80 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -133,18 +133,28 @@  static char *
 construct_dpdk_socket_mem(void)
 {
     const char *def_value = "1024";
-    int numa, numa_nodes = ovs_numa_get_n_numas();
+    struct ovs_numa_dump *dump;
+    int last_node = 0;
+
+    /* Build a list of all numa nodes with at least one core */
+    dump = ovs_numa_dump_n_cores_per_numa(1);
     struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
+    ds_put_cstr(&dpdk_socket_mem, def_value);
 
-    if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
-        numa_nodes = 1;
-    }
+    const struct ovs_numa_info_numa *node;
 
-    ds_put_cstr(&dpdk_socket_mem, def_value);
-    for (numa = 1; numa < numa_nodes; ++numa) {
-        ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+    FOR_EACH_NUMA_ON_DUMP(node, dump) {
+        while (node->numa_id > last_node+1 &&
+               node->numa_id != OVS_NUMA_UNSPEC &&
+               node->numa_id <= MAX_NUMA_NODES){
+                ds_put_format(&dpdk_socket_mem, ",%s", "0");
+                ++last_node;
+        }
+        if (node->numa_id != 0) {
+                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+        }
     }
-
+    ovs_numa_dump_destroy(dump);
     return ds_cstr(&dpdk_socket_mem);
 }
 
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index d2e7cc6..de1d037 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -58,7 +58,6 @@  VLOG_DEFINE_THIS_MODULE(ovs_numa);
  * TODO: Fix ovs-numa when cpu hotplug is used.
  */
 
-#define MAX_NUMA_NODES 128
 
 /* numa node. */
 struct numa_node {
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 8f2ea34..ecc251a 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -26,6 +26,8 @@ 
 #define OVS_CORE_UNSPEC INT_MAX
 #define OVS_NUMA_UNSPEC INT_MAX
 
+#define MAX_NUMA_NODES 128
+
 /* Dump of a list of 'struct ovs_numa_info'. */
 struct ovs_numa_dump {
     struct hmap cores;