diff mbox series

[ovs-dev,v5,2/2] ovs-numa: Dpdk options with non-contiguous nodes

Message ID 20210525192724.20021-3-dwilder@us.ibm.com
State Changes Requested
Headers show
Series Support for non-contiguous numa nodes and core ids. | expand

Commit Message

David Wilder May 25, 2021, 7:27 p.m. UTC
If not supplied by the user --socket-mem and --socket-limit EAL options
are set to a default value based system topology. The assumption that
numa nodes must be numbered contiguously is removed by this change.

These options can be seen in the ovs-vswitchd.log.  For example:
a system containing only numa nodes 0 and 8 will generate the following:

EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \
                         --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0

Signed-off-by: David Wilder <dwilder@us.ibm.com>
Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>
Tested-by: Emma Finn <emma.finn@intel.com>
---
 lib/dpdk.c     | 33 ++++++++++++++++++++++++---------
 lib/ovs-numa.c |  1 -
 lib/ovs-numa.h |  2 ++
 3 files changed, 26 insertions(+), 10 deletions(-)

Comments

Kevin Traynor June 4, 2021, 9:34 a.m. UTC | #1
On 25/05/2021 20:27, David Wilder wrote:
> If not supplied by the user --socket-mem and --socket-limit EAL options
> are set to a default value based system topology. The assumption that
> numa nodes must be numbered contiguously is removed by this change.
> 
> These options can be seen in the ovs-vswitchd.log.  For example:
> a system containing only numa nodes 0 and 8 will generate the following:
> 
> EAL ARGS: ovs-vswitchd --socket-mem 1024,0,0,0,0,0,0,0,1024 \
>                          --socket-limit 1024,0,0,0,0,0,0,0,1024 -l 0
> 

I think it needs to be squashed with the 1/2, (at least when applied)
otherwise with only 1/2 OVS will support non-contig numa but won't
generate the right default socket-mem option for DPDK.

> Signed-off-by: David Wilder <dwilder@us.ibm.com>
> Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>
> Tested-by: Emma Finn <emma.finn@intel.com>

I have a doubt about keeping the tags from older versions when the logic
has changed but it's up to reviewer/tester to comment.

> ---
>  lib/dpdk.c     | 33 ++++++++++++++++++++++++---------
>  lib/ovs-numa.c |  1 -
>  lib/ovs-numa.h |  2 ++
>  3 files changed, 26 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/dpdk.c b/lib/dpdk.c
> index 3195403..6e4a2e3 100644
> --- a/lib/dpdk.c
> +++ b/lib/dpdk.c
> @@ -133,18 +133,33 @@ static char *
>  construct_dpdk_socket_mem(void)
>  {
>      const char *def_value = "1024";
> -    int numa, numa_nodes = ovs_numa_get_n_numas();
> -    struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
> +    struct ovs_numa_dump *dump;
> +    int last_node = 0;
> +    const struct ovs_numa_info_numa *node;
>  
> -    if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
> -        numa_nodes = 1;
> -    }
> +    /* Build a list of all numa nodes with at least one core */
> +    dump = ovs_numa_dump_n_cores_per_numa(1);


> +    struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
>  
> -    ds_put_cstr(&dpdk_socket_mem, def_value);
> -    for (numa = 1; numa < numa_nodes; ++numa) {
> -        ds_put_format(&dpdk_socket_mem, ",%s", def_value);
> +    FOR_EACH_NUMA_ON_DUMP(node, dump) {

afaiu, the hmap will not guarantee any order and I can't see that it is
sorted before it is used below. The logic relies on the numas being
sorted in numa_id order, so i think you will have to sort them. There is
a good example of this to reference in the sorted_poll_thread_list().

Logic below lgtm, once the order is sorted.

> +        while (node->numa_id > last_node &&
> +               node->numa_id != OVS_NUMA_UNSPEC &&
> +               node->numa_id <= MAX_NUMA_NODES){
> +            if (last_node == 0) {
> +                ds_put_format(&dpdk_socket_mem, "%s", "0");
> +            } else {
> +                ds_put_format(&dpdk_socket_mem, ",%s", "0");
> +            }
> +            last_node++;
> +        }
> +        if (node->numa_id == 0) {
> +                ds_put_format(&dpdk_socket_mem, "%s", def_value);
> +        } else {
> +                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
> +        }
> +        last_node++;
>      }
> -
> +    ovs_numa_dump_destroy(dump);
>      return ds_cstr(&dpdk_socket_mem);
>  }
>  
> diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
> index 8c7d25b..b825ecb 100644
> --- a/lib/ovs-numa.c
> +++ b/lib/ovs-numa.c
> @@ -58,7 +58,6 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
>   * TODO: Fix ovs-numa when cpu hotplug is used.
>   */
>  
> -#define MAX_NUMA_NODES 128
>  
>  /* numa node. */
>  struct numa_node {
> diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
> index 8f2ea34..ecc251a 100644
> --- a/lib/ovs-numa.h
> +++ b/lib/ovs-numa.h
> @@ -26,6 +26,8 @@
>  #define OVS_CORE_UNSPEC INT_MAX
>  #define OVS_NUMA_UNSPEC INT_MAX
>  
> +#define MAX_NUMA_NODES 128
> +
>  /* Dump of a list of 'struct ovs_numa_info'. */
>  struct ovs_numa_dump {
>      struct hmap cores;
>
diff mbox series

Patch

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 3195403..6e4a2e3 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -133,18 +133,33 @@  static char *
 construct_dpdk_socket_mem(void)
 {
     const char *def_value = "1024";
-    int numa, numa_nodes = ovs_numa_get_n_numas();
-    struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
+    struct ovs_numa_dump *dump;
+    int last_node = 0;
+    const struct ovs_numa_info_numa *node;
 
-    if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
-        numa_nodes = 1;
-    }
+    /* Build a list of all numa nodes with at least one core */
+    dump = ovs_numa_dump_n_cores_per_numa(1);
+    struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
 
-    ds_put_cstr(&dpdk_socket_mem, def_value);
-    for (numa = 1; numa < numa_nodes; ++numa) {
-        ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+    FOR_EACH_NUMA_ON_DUMP(node, dump) {
+        while (node->numa_id > last_node &&
+               node->numa_id != OVS_NUMA_UNSPEC &&
+               node->numa_id <= MAX_NUMA_NODES){
+            if (last_node == 0) {
+                ds_put_format(&dpdk_socket_mem, "%s", "0");
+            } else {
+                ds_put_format(&dpdk_socket_mem, ",%s", "0");
+            }
+            last_node++;
+        }
+        if (node->numa_id == 0) {
+                ds_put_format(&dpdk_socket_mem, "%s", def_value);
+        } else {
+                ds_put_format(&dpdk_socket_mem, ",%s", def_value);
+        }
+        last_node++;
     }
-
+    ovs_numa_dump_destroy(dump);
     return ds_cstr(&dpdk_socket_mem);
 }
 
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 8c7d25b..b825ecb 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -58,7 +58,6 @@  VLOG_DEFINE_THIS_MODULE(ovs_numa);
  * TODO: Fix ovs-numa when cpu hotplug is used.
  */
 
-#define MAX_NUMA_NODES 128
 
 /* numa node. */
 struct numa_node {
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 8f2ea34..ecc251a 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -26,6 +26,8 @@ 
 #define OVS_CORE_UNSPEC INT_MAX
 #define OVS_NUMA_UNSPEC INT_MAX
 
+#define MAX_NUMA_NODES 128
+
 /* Dump of a list of 'struct ovs_numa_info'. */
 struct ovs_numa_dump {
     struct hmap cores;