diff mbox series

[ovs-dev,v2] netdev-dpdk: Trigger port reconfiguration in main thread for resets.

Message ID 20240118144109.4096011-1-david.marchand@redhat.com
State Superseded
Headers show
Series [ovs-dev,v2] netdev-dpdk: Trigger port reconfiguration in main thread for resets. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

David Marchand Jan. 18, 2024, 2:41 p.m. UTC
When OVS (main thread) configures a DPDK netdev, it holds a netdev_dpdk
mutex lock.
As part of this configure operation, the net/iavf driver (used with i40e
VF devices) triggers a queue count change. The PF entity (serviced by a
kernel PF driver for example) handles this change and requests back that
the VF driver resets the VF device. The driver then completes the VF reset
operation on its side and waits for completion of the iavf-event thread
responsible for handling various VF device events.

On the other hand, handling of the VF reset request in this iavf-event
thread results in notifying the application with a port reset request
(RTE_ETH_EVENT_INTR_RESET). The OVS reset callback tries to take a hold
of the same netdev_dpdk mutex and blocks the iavf-event thread.

As a resut, the net/iavf driver (still running on OVS main thread) is
unable to complete as it is waiting for iavf-event to complete.

To break from this situation, the OVS reset callback now won't take a
netdev_dpdk mutex. Instead, the port reset request is stored in a simple
RTE_ETH_MAXPORTS array associated to a seq object.
This is enough to let the VF driver complete this port initialisation.
The OVS main thread later handles the port reset request.

More details in the DPDK upstream bz as this issue appeared following a
change in DPDK.

Link: https://bugs.dpdk.org/show_bug.cgi?id=1337
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- converted to atomic accesses on netdev_dpdk_pending_reset[],


---
 lib/netdev-dpdk.c | 76 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 61 insertions(+), 15 deletions(-)

Comments

David Marchand Jan. 18, 2024, 3:39 p.m. UTC | #1
On Thu, Jan 18, 2024 at 3:41 PM David Marchand
<david.marchand@redhat.com> wrote:
> Changes since v1:
> - converted to atomic accesses on netdev_dpdk_pending_reset[],

I did not wait for the clang results during my tests... cooking v3.
Ilya Maximets Jan. 18, 2024, 3:43 p.m. UTC | #2
On 1/18/24 15:41, David Marchand wrote:
> When OVS (main thread) configures a DPDK netdev, it holds a netdev_dpdk
> mutex lock.
> As part of this configure operation, the net/iavf driver (used with i40e
> VF devices) triggers a queue count change. The PF entity (serviced by a
> kernel PF driver for example) handles this change and requests back that
> the VF driver resets the VF device. The driver then completes the VF reset
> operation on its side and waits for completion of the iavf-event thread
> responsible for handling various VF device events.
> 
> On the other hand, handling of the VF reset request in this iavf-event
> thread results in notifying the application with a port reset request
> (RTE_ETH_EVENT_INTR_RESET). The OVS reset callback tries to take a hold
> of the same netdev_dpdk mutex and blocks the iavf-event thread.
> 
> As a resut, the net/iavf driver (still running on OVS main thread) is
> unable to complete as it is waiting for iavf-event to complete.
> 
> To break from this situation, the OVS reset callback now won't take a
> netdev_dpdk mutex. Instead, the port reset request is stored in a simple
> RTE_ETH_MAXPORTS array associated to a seq object.
> This is enough to let the VF driver complete this port initialisation.
> The OVS main thread later handles the port reset request.
> 
> More details in the DPDK upstream bz as this issue appeared following a
> change in DPDK.
> 
> Link: https://bugs.dpdk.org/show_bug.cgi?id=1337
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since v1:
> - converted to atomic accesses on netdev_dpdk_pending_reset[],
> 
> 
> ---
>  lib/netdev-dpdk.c | 76 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 61 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fb26825ff8..6b15e4c03a 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -58,6 +58,7 @@
>  #include "openvswitch/match.h"
>  #include "openvswitch/ofp-parse.h"
>  #include "openvswitch/ofp-print.h"
> +#include "openvswitch/poll-loop.h"
>  #include "openvswitch/shash.h"
>  #include "openvswitch/vlog.h"
>  #include "ovs-numa.h"
> @@ -2101,32 +2102,73 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
>      return new_port_id;
>  }
>  
> +static struct seq *netdev_dpdk_reset_seq;
> +static uint64_t netdev_dpdk_last_reset_seq;
> +static bool netdev_dpdk_pending_reset[RTE_MAX_ETHPORTS];

Should be an atomic_bool, I suppose.

> +
> +static void
> +netdev_dpdk_wait(const struct netdev_class *netdev_class OVS_UNUSED)
> +{
> +    uint64_t last_reset_seq = seq_read(netdev_dpdk_reset_seq);
> +
> +    if (netdev_dpdk_last_reset_seq == last_reset_seq) {
> +        seq_wait(netdev_dpdk_reset_seq, netdev_dpdk_last_reset_seq);
> +    } else {
> +        poll_immediate_wake();
> +    }
> +}
> +
> +static void
> +netdev_dpdk_run(const struct netdev_class *netdev_class OVS_UNUSED)
> +{
> +    uint64_t reset_seq = seq_read(netdev_dpdk_reset_seq);
> +
> +    if (reset_seq != netdev_dpdk_last_reset_seq) {
> +        dpdk_port_t port_id;
> +
> +        netdev_dpdk_last_reset_seq = reset_seq;
> +
> +        for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
> +            struct netdev_dpdk *dev;
> +            bool pending_reset;
> +
> +            atomic_read_relaxed(&netdev_dpdk_pending_reset[port_id],
> +                                 &pending_reset);

Indentation.

> +            if (!pending_reset) {
> +                continue;
> +            }
> +            atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], false);
> +
> +            ovs_mutex_lock(&dpdk_mutex);
> +            dev = netdev_dpdk_lookup_by_port_id(port_id);
> +            if (dev) {
> +                ovs_mutex_lock(&dev->mutex);
> +                dev->reset_needed = true;
> +                netdev_request_reconfigure(&dev->up);
> +                VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> +                            netdev_get_name(&dev->up));
> +                ovs_mutex_unlock(&dev->mutex);
> +            }
> +            ovs_mutex_unlock(&dpdk_mutex);
> +        }
> +    }
> +}
> +
>  static int
>  dpdk_eth_event_callback(dpdk_port_t port_id, enum rte_eth_event_type type,
>                          void *param OVS_UNUSED, void *ret_param OVS_UNUSED)
>  {
> -    struct netdev_dpdk *dev;
> -
>      switch ((int) type) {
>      case RTE_ETH_EVENT_INTR_RESET:
> -        ovs_mutex_lock(&dpdk_mutex);
> -        dev = netdev_dpdk_lookup_by_port_id(port_id);
> -        if (dev) {
> -            ovs_mutex_lock(&dev->mutex);
> -            dev->reset_needed = true;
> -            netdev_request_reconfigure(&dev->up);
> -            VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> -                        netdev_get_name(&dev->up));
> -            ovs_mutex_unlock(&dev->mutex);
> -        }
> -        ovs_mutex_unlock(&dpdk_mutex);
> +        atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], true);
> +        seq_change(netdev_dpdk_reset_seq);
>          break;
>  
>      default:
>          /* Ignore all other types. */
>          break;
> -   }
> -   return 0;
> +    }
> +    return 0;
>  }
>  
>  static void
> @@ -5001,6 +5043,8 @@ netdev_dpdk_class_init(void)
>                                   "[netdev]", 0, 1,
>                                   netdev_dpdk_get_mempool_info, NULL);
>  
> +        netdev_dpdk_reset_seq = seq_create();
> +        netdev_dpdk_last_reset_seq = seq_read(netdev_dpdk_reset_seq);
>          ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
>                                              RTE_ETH_EVENT_INTR_RESET,
>                                              dpdk_eth_event_callback, NULL);
> @@ -6593,6 +6637,8 @@ parse_vhost_config(const struct smap *ovs_other_config)
>  #define NETDEV_DPDK_CLASS_BASE                          \
>      NETDEV_DPDK_CLASS_COMMON,                           \
>      .init = netdev_dpdk_class_init,                     \
> +    .wait = netdev_dpdk_wait,                           \
> +    .run = netdev_dpdk_run,                             \
>      .destruct = netdev_dpdk_destruct,                   \
>      .set_tx_multiq = netdev_dpdk_set_tx_multiq,         \
>      .get_carrier = netdev_dpdk_get_carrier,             \
David Marchand Jan. 18, 2024, 3:53 p.m. UTC | #3
On Thu, Jan 18, 2024 at 4:43 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 1/18/24 15:41, David Marchand wrote:
> > When OVS (main thread) configures a DPDK netdev, it holds a netdev_dpdk
> > mutex lock.
> > As part of this configure operation, the net/iavf driver (used with i40e
> > VF devices) triggers a queue count change. The PF entity (serviced by a
> > kernel PF driver for example) handles this change and requests back that
> > the VF driver resets the VF device. The driver then completes the VF reset
> > operation on its side and waits for completion of the iavf-event thread
> > responsible for handling various VF device events.
> >
> > On the other hand, handling of the VF reset request in this iavf-event
> > thread results in notifying the application with a port reset request
> > (RTE_ETH_EVENT_INTR_RESET). The OVS reset callback tries to take a hold
> > of the same netdev_dpdk mutex and blocks the iavf-event thread.
> >
> > As a resut, the net/iavf driver (still running on OVS main thread) is
> > unable to complete as it is waiting for iavf-event to complete.
> >
> > To break from this situation, the OVS reset callback now won't take a
> > netdev_dpdk mutex. Instead, the port reset request is stored in a simple
> > RTE_ETH_MAXPORTS array associated to a seq object.
> > This is enough to let the VF driver complete this port initialisation.
> > The OVS main thread later handles the port reset request.
> >
> > More details in the DPDK upstream bz as this issue appeared following a
> > change in DPDK.
> >
> > Link: https://bugs.dpdk.org/show_bug.cgi?id=1337
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> > Changes since v1:
> > - converted to atomic accesses on netdev_dpdk_pending_reset[],
> >
> >
> > ---
> >  lib/netdev-dpdk.c | 76 +++++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 61 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index fb26825ff8..6b15e4c03a 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -58,6 +58,7 @@
> >  #include "openvswitch/match.h"
> >  #include "openvswitch/ofp-parse.h"
> >  #include "openvswitch/ofp-print.h"
> > +#include "openvswitch/poll-loop.h"
> >  #include "openvswitch/shash.h"
> >  #include "openvswitch/vlog.h"
> >  #include "ovs-numa.h"
> > @@ -2101,32 +2102,73 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
> >      return new_port_id;
> >  }
> >
> > +static struct seq *netdev_dpdk_reset_seq;
> > +static uint64_t netdev_dpdk_last_reset_seq;
> > +static bool netdev_dpdk_pending_reset[RTE_MAX_ETHPORTS];
>
> Should be an atomic_bool, I suppose.

Yes, already fixed.


>
> > +
> > +static void
> > +netdev_dpdk_wait(const struct netdev_class *netdev_class OVS_UNUSED)
> > +{
> > +    uint64_t last_reset_seq = seq_read(netdev_dpdk_reset_seq);
> > +
> > +    if (netdev_dpdk_last_reset_seq == last_reset_seq) {
> > +        seq_wait(netdev_dpdk_reset_seq, netdev_dpdk_last_reset_seq);
> > +    } else {
> > +        poll_immediate_wake();
> > +    }
> > +}
> > +
> > +static void
> > +netdev_dpdk_run(const struct netdev_class *netdev_class OVS_UNUSED)
> > +{
> > +    uint64_t reset_seq = seq_read(netdev_dpdk_reset_seq);
> > +
> > +    if (reset_seq != netdev_dpdk_last_reset_seq) {
> > +        dpdk_port_t port_id;
> > +
> > +        netdev_dpdk_last_reset_seq = reset_seq;
> > +
> > +        for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
> > +            struct netdev_dpdk *dev;
> > +            bool pending_reset;
> > +
> > +            atomic_read_relaxed(&netdev_dpdk_pending_reset[port_id],
> > +                                 &pending_reset);
>
> Indentation.

Indeed.


>
> > +            if (!pending_reset) {
> > +                continue;
> > +            }
> > +            atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], false);
> > +
> > +            ovs_mutex_lock(&dpdk_mutex);
> > +            dev = netdev_dpdk_lookup_by_port_id(port_id);
> > +            if (dev) {
> > +                ovs_mutex_lock(&dev->mutex);
> > +                dev->reset_needed = true;
> > +                netdev_request_reconfigure(&dev->up);
> > +                VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> > +                            netdev_get_name(&dev->up));
> > +                ovs_mutex_unlock(&dev->mutex);
> > +            }
> > +            ovs_mutex_unlock(&dpdk_mutex);
> > +        }
> > +    }
> > +}
> > +
> >  static int
> >  dpdk_eth_event_callback(dpdk_port_t port_id, enum rte_eth_event_type type,
> >                          void *param OVS_UNUSED, void *ret_param OVS_UNUSED)
> >  {
> > -    struct netdev_dpdk *dev;
> > -
> >      switch ((int) type) {
> >      case RTE_ETH_EVENT_INTR_RESET:
> > -        ovs_mutex_lock(&dpdk_mutex);
> > -        dev = netdev_dpdk_lookup_by_port_id(port_id);
> > -        if (dev) {
> > -            ovs_mutex_lock(&dev->mutex);
> > -            dev->reset_needed = true;
> > -            netdev_request_reconfigure(&dev->up);
> > -            VLOG_DBG_RL(&rl, "%s: Device reset requested.",
> > -                        netdev_get_name(&dev->up));
> > -            ovs_mutex_unlock(&dev->mutex);
> > -        }
> > -        ovs_mutex_unlock(&dpdk_mutex);
> > +        atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], true);
> > +        seq_change(netdev_dpdk_reset_seq);
> >          break;
> >
> >      default:
> >          /* Ignore all other types. */
> >          break;
> > -   }
> > -   return 0;
> > +    }
> > +    return 0;
> >  }
> >
> >  static void
> > @@ -5001,6 +5043,8 @@ netdev_dpdk_class_init(void)
> >                                   "[netdev]", 0, 1,
> >                                   netdev_dpdk_get_mempool_info, NULL);
> >
> > +        netdev_dpdk_reset_seq = seq_create();
> > +        netdev_dpdk_last_reset_seq = seq_read(netdev_dpdk_reset_seq);
> >          ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
> >                                              RTE_ETH_EVENT_INTR_RESET,
> >                                              dpdk_eth_event_callback, NULL);
> > @@ -6593,6 +6637,8 @@ parse_vhost_config(const struct smap *ovs_other_config)
> >  #define NETDEV_DPDK_CLASS_BASE                          \
> >      NETDEV_DPDK_CLASS_COMMON,                           \
> >      .init = netdev_dpdk_class_init,                     \
> > +    .wait = netdev_dpdk_wait,                           \
> > +    .run = netdev_dpdk_run,                             \
> >      .destruct = netdev_dpdk_destruct,                   \
> >      .set_tx_multiq = netdev_dpdk_set_tx_multiq,         \
> >      .get_carrier = netdev_dpdk_get_carrier,             \
>

And I forgot to remove the Known issue entry in NEWS.
For v3.
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fb26825ff8..6b15e4c03a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -58,6 +58,7 @@ 
 #include "openvswitch/match.h"
 #include "openvswitch/ofp-parse.h"
 #include "openvswitch/ofp-print.h"
+#include "openvswitch/poll-loop.h"
 #include "openvswitch/shash.h"
 #include "openvswitch/vlog.h"
 #include "ovs-numa.h"
@@ -2101,32 +2102,73 @@  netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
     return new_port_id;
 }
 
+static struct seq *netdev_dpdk_reset_seq;
+static uint64_t netdev_dpdk_last_reset_seq;
+static bool netdev_dpdk_pending_reset[RTE_MAX_ETHPORTS];
+
+static void
+netdev_dpdk_wait(const struct netdev_class *netdev_class OVS_UNUSED)
+{
+    uint64_t last_reset_seq = seq_read(netdev_dpdk_reset_seq);
+
+    if (netdev_dpdk_last_reset_seq == last_reset_seq) {
+        seq_wait(netdev_dpdk_reset_seq, netdev_dpdk_last_reset_seq);
+    } else {
+        poll_immediate_wake();
+    }
+}
+
+static void
+netdev_dpdk_run(const struct netdev_class *netdev_class OVS_UNUSED)
+{
+    uint64_t reset_seq = seq_read(netdev_dpdk_reset_seq);
+
+    if (reset_seq != netdev_dpdk_last_reset_seq) {
+        dpdk_port_t port_id;
+
+        netdev_dpdk_last_reset_seq = reset_seq;
+
+        for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+            struct netdev_dpdk *dev;
+            bool pending_reset;
+
+            atomic_read_relaxed(&netdev_dpdk_pending_reset[port_id],
+                                 &pending_reset);
+            if (!pending_reset) {
+                continue;
+            }
+            atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], false);
+
+            ovs_mutex_lock(&dpdk_mutex);
+            dev = netdev_dpdk_lookup_by_port_id(port_id);
+            if (dev) {
+                ovs_mutex_lock(&dev->mutex);
+                dev->reset_needed = true;
+                netdev_request_reconfigure(&dev->up);
+                VLOG_DBG_RL(&rl, "%s: Device reset requested.",
+                            netdev_get_name(&dev->up));
+                ovs_mutex_unlock(&dev->mutex);
+            }
+            ovs_mutex_unlock(&dpdk_mutex);
+        }
+    }
+}
+
 static int
 dpdk_eth_event_callback(dpdk_port_t port_id, enum rte_eth_event_type type,
                         void *param OVS_UNUSED, void *ret_param OVS_UNUSED)
 {
-    struct netdev_dpdk *dev;
-
     switch ((int) type) {
     case RTE_ETH_EVENT_INTR_RESET:
-        ovs_mutex_lock(&dpdk_mutex);
-        dev = netdev_dpdk_lookup_by_port_id(port_id);
-        if (dev) {
-            ovs_mutex_lock(&dev->mutex);
-            dev->reset_needed = true;
-            netdev_request_reconfigure(&dev->up);
-            VLOG_DBG_RL(&rl, "%s: Device reset requested.",
-                        netdev_get_name(&dev->up));
-            ovs_mutex_unlock(&dev->mutex);
-        }
-        ovs_mutex_unlock(&dpdk_mutex);
+        atomic_store_relaxed(&netdev_dpdk_pending_reset[port_id], true);
+        seq_change(netdev_dpdk_reset_seq);
         break;
 
     default:
         /* Ignore all other types. */
         break;
-   }
-   return 0;
+    }
+    return 0;
 }
 
 static void
@@ -5001,6 +5043,8 @@  netdev_dpdk_class_init(void)
                                  "[netdev]", 0, 1,
                                  netdev_dpdk_get_mempool_info, NULL);
 
+        netdev_dpdk_reset_seq = seq_create();
+        netdev_dpdk_last_reset_seq = seq_read(netdev_dpdk_reset_seq);
         ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
                                             RTE_ETH_EVENT_INTR_RESET,
                                             dpdk_eth_event_callback, NULL);
@@ -6593,6 +6637,8 @@  parse_vhost_config(const struct smap *ovs_other_config)
 #define NETDEV_DPDK_CLASS_BASE                          \
     NETDEV_DPDK_CLASS_COMMON,                           \
     .init = netdev_dpdk_class_init,                     \
+    .wait = netdev_dpdk_wait,                           \
+    .run = netdev_dpdk_run,                             \
     .destruct = netdev_dpdk_destruct,                   \
     .set_tx_multiq = netdev_dpdk_set_tx_multiq,         \
     .get_carrier = netdev_dpdk_get_carrier,             \