diff mbox series

[ovs-dev,PATCHv3] netdev-linux: Detect numa node id.

Message ID 1570048017-94384-1-git-send-email-u9012063@gmail.com
State Superseded
Headers show
Series [ovs-dev,PATCHv3] netdev-linux: Detect numa node id. | expand

Commit Message

William Tu Oct. 2, 2019, 8:26 p.m. UTC
The patch detects the numa node id from the name of the netdev,
by reading the '/sys/class/net/<devname>/device/numa_node'.
If not available, ex: virtual device, or any error happens,
return numa id 0.  Currently only the afxdp netdev type uses it,
other linux netdev types are disabled due to no use case.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/592728452
Signed-off-by: William Tu <u9012063@gmail.com>
---
v3:
  Feedbacks from Ilya and Eelco
  - update doc, afxdp.rst
  - fix coding style
  - fix limit of numa node max, by using ovs_numa_numa_id_is_valid
  - move the function to netdev-linux
  - cache the result of numa_id
  - add security check for netdev name
  - use fscanf instead of read and convert to int
  - revise some error message content

v2:
  address feedback from Eelco
    fix memory leak of xaspintf
    log using INFO instead of WARN
---
 Documentation/intro/install/afxdp.rst |  1 -
 lib/netdev-afxdp.c                    | 11 -------
 lib/netdev-linux-private.h            |  2 ++
 lib/netdev-linux.c                    | 57 ++++++++++++++++++++++++++++++++++-
 4 files changed, 58 insertions(+), 13 deletions(-)

Comments

0-day Robot Oct. 2, 2019, 8:58 p.m. UTC | #1
Bleep bloop.  Greetings William Tu, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line lacks whitespace around operator
#106 FILE: lib/netdev-linux.c:1416:
    numa_node_path = xasprintf("/sys/class/net/%s/device/numa_node", name);

Lines checked: 153, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Eelco Chaudron Oct. 15, 2019, 11:45 a.m. UTC | #2
See some comment below…

On 2 Oct 2019, at 22:26, William Tu wrote:

> The patch detects the numa node id from the name of the netdev,
> by reading the '/sys/class/net/<devname>/device/numa_node'.
> If not available, ex: virtual device, or any error happens,
> return numa id 0.  Currently only the afxdp netdev type uses it,
> other linux netdev types are disabled due to no use case.
>
> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/592728452
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
> v3:
>   Feedbacks from Ilya and Eelco
>   - update doc, afxdp.rst
>   - fix coding style
>   - fix limit of numa node max, by using ovs_numa_numa_id_is_valid
>   - move the function to netdev-linux
>   - cache the result of numa_id
>   - add security check for netdev name
>   - use fscanf instead of read and convert to int
>   - revise some error message content
>
> v2:
>   address feedback from Eelco
>     fix memory leak of xaspintf
>     log using INFO instead of WARN
> ---
>  Documentation/intro/install/afxdp.rst |  1 -
>  lib/netdev-afxdp.c                    | 11 -------
>  lib/netdev-linux-private.h            |  2 ++
>  lib/netdev-linux.c                    | 57 
> ++++++++++++++++++++++++++++++++++-
>  4 files changed, 58 insertions(+), 13 deletions(-)
>
> diff --git a/Documentation/intro/install/afxdp.rst 
> b/Documentation/intro/install/afxdp.rst
> index 820e9d993d8f..6c00c4ad1356 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -309,7 +309,6 @@ Below is a script using namespaces and veth peer::
>
>  Limitations/Known Issues
>  ------------------------
> -#. Device's numa ID is always 0, need a way to find numa id from a 
> netdev.
>  #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A 
> possible
>     work-around is to use OpenFlow meter action.
>  #. Most of the tests are done using i40e single port. Multiple ports 
> and
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 6e01803272aa..95bf6dcd5b2b 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -549,17 +549,6 @@ out:
>      return err;
>  }
>
> -int
> -netdev_afxdp_get_numa_id(const struct netdev *netdev)
> -{
> -    /* FIXME: Get netdev's PCIe device ID, then find
> -     * its NUMA node id.
> -     */
> -    VLOG_INFO("FIXME: Device %s always use numa id 0.",
> -              netdev_get_name(netdev));
> -    return 0;
> -}
> -
>  static void
>  xsk_remove_xdp_program(uint32_t ifindex, int xdpmode)
>  {
> diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> index a350be151147..c8f2be47b10b 100644
> --- a/lib/netdev-linux-private.h
> +++ b/lib/netdev-linux-private.h
> @@ -96,6 +96,8 @@ struct netdev_linux {
>      /* LAG information. */
>      bool is_lag_master;         /* True if the netdev is a LAG 
> master. */
>
> +    int numa_id;                /* NUMA node id. */
> +
>  #ifdef HAVE_AF_XDP
>      /* AF_XDP information. */
>      struct xsk_socket_info **xsks;
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index f4819237370a..bdf077ed5dd0 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -236,6 +236,7 @@ enum {
>      VALID_VPORT_STAT_ERROR  = 1 << 5,
>      VALID_DRVINFO           = 1 << 6,
>      VALID_FEATURES          = 1 << 7,
> +    VALID_NUMA_ID           = 1 << 8,
>  };
>  
>  struct linux_lag_slave {
> @@ -1391,6 +1392,60 @@ netdev_linux_tap_batch_send(struct netdev 
> *netdev_,
>      return 0;
>  }
>
> +static int OVS_UNUSED
> +netdev_linux_get_numa_id(const struct netdev *netdev_)
> +{
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> +    char *numa_node_path;
> +    const char *name;
> +    int node_id;
> +    FILE *stream;
> +
> +    if (netdev->cache_valid & VALID_NUMA_ID) {
> +        return netdev->numa_id;
> +    }
> +
> +    name = netdev_get_name(netdev_);
> +    if (strchr(name, '/') || strchr(name, '\\')) {

maybe use strpbrk() here to avoid walking the name sting twice?

> +        VLOG_ERR_RL(&rl, "\"%s\" is not a valid name for a port. "
> +                    "A valid name must not include '/' or '\\'."
> +                    "Using numa_id 0", name);
> +        return 0;
> +    }
> +
> +    numa_node_path = xasprintf("/sys/class/net/%s/device/numa_node", 
> name);
> +
> +    stream = fopen(numa_node_path, "r");
> +    if (!stream) {
> +        /* Virtual device does not have this info. */
> +        VLOG_INFO_RL(&rl, "%s: Can't open '%s': %s, using numa_id 0",
> +                     name, numa_node_path, ovs_strerror(errno));
> +        free(numa_node_path);
I think for clarity we should do “netdev->numa_id = 0”
> +        netdev->cache_valid |= VALID_NUMA_ID;
> +        return 0;
> +    }
> +
> +    if (fscanf(stream, "%d", &node_id) != 1) {
> +        goto error;
> +    };
> +
> +    if (!ovs_numa_numa_id_is_valid(node_id)) {
> +        goto error;
> +    }
> +
> +    netdev->numa_id = node_id;
> +    netdev->cache_valid |= VALID_NUMA_ID;
> +    fclose(stream);
> +    free(numa_node_path);
> +    return node_id;
> +
> +error:
> +    VLOG_WARN_RL(&rl, "%s: Can't detect NUMA node, using numa_id 0", 
> name);
> +    free(numa_node_path);
Should we no do:

netdev->numa_id = 0”
netdev->cache_valid |= VALID_NUMA_ID;


> +    fclose(stream);
> +    return 0;
> +}
> +
>  /* Sends 'batch' on 'netdev'.  Returns 0 if successful, otherwise a 
> positive
>   * errno value.  Returns EAGAIN without blocking if the packet cannot 
> be queued
>   * immediately.  Returns EMSGSIZE if a partial packet was transmitted 
> or if
> @@ -3298,7 +3353,7 @@ const struct netdev_class netdev_afxdp_class = {
>      .set_config = netdev_afxdp_set_config,
>      .get_config = netdev_afxdp_get_config,
>      .reconfigure = netdev_afxdp_reconfigure,
> -    .get_numa_id = netdev_afxdp_get_numa_id,
> +    .get_numa_id = netdev_linux_get_numa_id,
>      .send = netdev_afxdp_batch_send,
>      .rxq_construct = netdev_afxdp_rxq_construct,
>      .rxq_destruct = netdev_afxdp_rxq_destruct,
> -- 
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
William Tu Oct. 17, 2019, 7:54 p.m. UTC | #3
On Tue, Oct 15, 2019 at 01:45:19PM +0200, Eelco Chaudron wrote:
> See some comment below…

Thank you. Sorry for my late response.

> 
> On 2 Oct 2019, at 22:26, William Tu wrote:
> 
> >The patch detects the numa node id from the name of the netdev,
> >by reading the '/sys/class/net/<devname>/device/numa_node'.
> >If not available, ex: virtual device, or any error happens,
> >return numa id 0.  Currently only the afxdp netdev type uses it,
> >other linux netdev types are disabled due to no use case.
> >
> >Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/592728452
> >Signed-off-by: William Tu <u9012063@gmail.com>
> >---
> >v3:
> >  Feedbacks from Ilya and Eelco
> >  - update doc, afxdp.rst
> >  - fix coding style
> >  - fix limit of numa node max, by using ovs_numa_numa_id_is_valid
> >  - move the function to netdev-linux
> >  - cache the result of numa_id
> >  - add security check for netdev name
> >  - use fscanf instead of read and convert to int
> >  - revise some error message content
> >
> >v2:
> >  address feedback from Eelco
> >    fix memory leak of xaspintf
> >    log using INFO instead of WARN
> >---
> > Documentation/intro/install/afxdp.rst |  1 -
> > lib/netdev-afxdp.c                    | 11 -------
> > lib/netdev-linux-private.h            |  2 ++
> > lib/netdev-linux.c                    | 57
> >++++++++++++++++++++++++++++++++++-
> > 4 files changed, 58 insertions(+), 13 deletions(-)
> >
> >diff --git a/Documentation/intro/install/afxdp.rst
> >b/Documentation/intro/install/afxdp.rst
> >index 820e9d993d8f..6c00c4ad1356 100644
> >--- a/Documentation/intro/install/afxdp.rst
> >+++ b/Documentation/intro/install/afxdp.rst
> >@@ -309,7 +309,6 @@ Below is a script using namespaces and veth peer::
> >
> > Limitations/Known Issues
> > ------------------------
> >-#. Device's numa ID is always 0, need a way to find numa id from a
> >netdev.
> > #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A
> >possible
> >    work-around is to use OpenFlow meter action.
> > #. Most of the tests are done using i40e single port. Multiple ports and
> >diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> >index 6e01803272aa..95bf6dcd5b2b 100644
> >--- a/lib/netdev-afxdp.c
> >+++ b/lib/netdev-afxdp.c
> >@@ -549,17 +549,6 @@ out:
> >     return err;
> > }
> >
> >-int
> >-netdev_afxdp_get_numa_id(const struct netdev *netdev)
> >-{
> >-    /* FIXME: Get netdev's PCIe device ID, then find
> >-     * its NUMA node id.
> >-     */
> >-    VLOG_INFO("FIXME: Device %s always use numa id 0.",
> >-              netdev_get_name(netdev));
> >-    return 0;
> >-}
> >-
> > static void
> > xsk_remove_xdp_program(uint32_t ifindex, int xdpmode)
> > {
> >diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> >index a350be151147..c8f2be47b10b 100644
> >--- a/lib/netdev-linux-private.h
> >+++ b/lib/netdev-linux-private.h
> >@@ -96,6 +96,8 @@ struct netdev_linux {
> >     /* LAG information. */
> >     bool is_lag_master;         /* True if the netdev is a LAG master. */
> >
> >+    int numa_id;                /* NUMA node id. */
> >+
> > #ifdef HAVE_AF_XDP
> >     /* AF_XDP information. */
> >     struct xsk_socket_info **xsks;
> >diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> >index f4819237370a..bdf077ed5dd0 100644
> >--- a/lib/netdev-linux.c
> >+++ b/lib/netdev-linux.c
> >@@ -236,6 +236,7 @@ enum {
> >     VALID_VPORT_STAT_ERROR  = 1 << 5,
> >     VALID_DRVINFO           = 1 << 6,
> >     VALID_FEATURES          = 1 << 7,
> >+    VALID_NUMA_ID           = 1 << 8,
> > };
> > 
> > struct linux_lag_slave {
> >@@ -1391,6 +1392,60 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
> >     return 0;
> > }
> >
> >+static int OVS_UNUSED
> >+netdev_linux_get_numa_id(const struct netdev *netdev_)
> >+{
> >+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> >+    char *numa_node_path;
> >+    const char *name;
> >+    int node_id;
> >+    FILE *stream;
> >+
> >+    if (netdev->cache_valid & VALID_NUMA_ID) {
> >+        return netdev->numa_id;
> >+    }
> >+
> >+    name = netdev_get_name(netdev_);
> >+    if (strchr(name, '/') || strchr(name, '\\')) {
> 
> maybe use strpbrk() here to avoid walking the name sting twice?

OK, make sense.
> 
> >+        VLOG_ERR_RL(&rl, "\"%s\" is not a valid name for a port. "
> >+                    "A valid name must not include '/' or '\\'."
> >+                    "Using numa_id 0", name);
> >+        return 0;
> >+    }
> >+
> >+    numa_node_path = xasprintf("/sys/class/net/%s/device/numa_node",
> >name);
> >+
> >+    stream = fopen(numa_node_path, "r");
> >+    if (!stream) {
> >+        /* Virtual device does not have this info. */
> >+        VLOG_INFO_RL(&rl, "%s: Can't open '%s': %s, using numa_id 0",
> >+                     name, numa_node_path, ovs_strerror(errno));
> >+        free(numa_node_path);
> I think for clarity we should do “netdev->numa_id = 0”

Yes

> >+        netdev->cache_valid |= VALID_NUMA_ID;
> >+        return 0;
> >+    }
> >+
> >+    if (fscanf(stream, "%d", &node_id) != 1) {
> >+        goto error;
> >+    };
> >+
> >+    if (!ovs_numa_numa_id_is_valid(node_id)) {
> >+        goto error;
> >+    }
> >+
> >+    netdev->numa_id = node_id;
> >+    netdev->cache_valid |= VALID_NUMA_ID;
> >+    fclose(stream);
> >+    free(numa_node_path);
> >+    return node_id;
> >+
> >+error:
> >+    VLOG_WARN_RL(&rl, "%s: Can't detect NUMA node, using numa_id 0",
> >name);
> >+    free(numa_node_path);
> Should we no do:
> 
> netdev->numa_id = 0”
> netdev->cache_valid |= VALID_NUMA_ID;

Yes, thanks.
I move the two to the beginning of the function.
NUMA node id of a netdev should be pretty stable, so once
we detect it, it's less likely to change.

I will send out v4.

Regards,
William
diff mbox series

Patch

diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index 820e9d993d8f..6c00c4ad1356 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -309,7 +309,6 @@  Below is a script using namespaces and veth peer::
 
 Limitations/Known Issues
 ------------------------
-#. Device's numa ID is always 0, need a way to find numa id from a netdev.
 #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A possible
    work-around is to use OpenFlow meter action.
 #. Most of the tests are done using i40e single port. Multiple ports and
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 6e01803272aa..95bf6dcd5b2b 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -549,17 +549,6 @@  out:
     return err;
 }
 
-int
-netdev_afxdp_get_numa_id(const struct netdev *netdev)
-{
-    /* FIXME: Get netdev's PCIe device ID, then find
-     * its NUMA node id.
-     */
-    VLOG_INFO("FIXME: Device %s always use numa id 0.",
-              netdev_get_name(netdev));
-    return 0;
-}
-
 static void
 xsk_remove_xdp_program(uint32_t ifindex, int xdpmode)
 {
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151147..c8f2be47b10b 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -96,6 +96,8 @@  struct netdev_linux {
     /* LAG information. */
     bool is_lag_master;         /* True if the netdev is a LAG master. */
 
+    int numa_id;                /* NUMA node id. */
+
 #ifdef HAVE_AF_XDP
     /* AF_XDP information. */
     struct xsk_socket_info **xsks;
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f4819237370a..bdf077ed5dd0 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -236,6 +236,7 @@  enum {
     VALID_VPORT_STAT_ERROR  = 1 << 5,
     VALID_DRVINFO           = 1 << 6,
     VALID_FEATURES          = 1 << 7,
+    VALID_NUMA_ID           = 1 << 8,
 };
 
 struct linux_lag_slave {
@@ -1391,6 +1392,60 @@  netdev_linux_tap_batch_send(struct netdev *netdev_,
     return 0;
 }
 
+static int OVS_UNUSED
+netdev_linux_get_numa_id(const struct netdev *netdev_)
+{
+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+    char *numa_node_path;
+    const char *name;
+    int node_id;
+    FILE *stream;
+
+    if (netdev->cache_valid & VALID_NUMA_ID) {
+        return netdev->numa_id;
+    }
+
+    name = netdev_get_name(netdev_);
+    if (strchr(name, '/') || strchr(name, '\\')) {
+        VLOG_ERR_RL(&rl, "\"%s\" is not a valid name for a port. "
+                    "A valid name must not include '/' or '\\'."
+                    "Using numa_id 0", name);
+        return 0;
+    }
+
+    numa_node_path = xasprintf("/sys/class/net/%s/device/numa_node", name);
+
+    stream = fopen(numa_node_path, "r");
+    if (!stream) {
+        /* Virtual device does not have this info. */
+        VLOG_INFO_RL(&rl, "%s: Can't open '%s': %s, using numa_id 0",
+                     name, numa_node_path, ovs_strerror(errno));
+        free(numa_node_path);
+        netdev->cache_valid |= VALID_NUMA_ID;
+        return 0;
+    }
+
+    if (fscanf(stream, "%d", &node_id) != 1) {
+        goto error;
+    };
+
+    if (!ovs_numa_numa_id_is_valid(node_id)) {
+        goto error;
+    }
+
+    netdev->numa_id = node_id;
+    netdev->cache_valid |= VALID_NUMA_ID;
+    fclose(stream);
+    free(numa_node_path);
+    return node_id;
+
+error:
+    VLOG_WARN_RL(&rl, "%s: Can't detect NUMA node, using numa_id 0", name);
+    free(numa_node_path);
+    fclose(stream);
+    return 0;
+}
+
 /* Sends 'batch' on 'netdev'.  Returns 0 if successful, otherwise a positive
  * errno value.  Returns EAGAIN without blocking if the packet cannot be queued
  * immediately.  Returns EMSGSIZE if a partial packet was transmitted or if
@@ -3298,7 +3353,7 @@  const struct netdev_class netdev_afxdp_class = {
     .set_config = netdev_afxdp_set_config,
     .get_config = netdev_afxdp_get_config,
     .reconfigure = netdev_afxdp_reconfigure,
-    .get_numa_id = netdev_afxdp_get_numa_id,
+    .get_numa_id = netdev_linux_get_numa_id,
     .send = netdev_afxdp_batch_send,
     .rxq_construct = netdev_afxdp_rxq_construct,
     .rxq_destruct = netdev_afxdp_rxq_destruct,