diff mbox series

[ovs-dev,v2,1/1] netdev-dpdk: Don't use PMD driver if not configured successfully

Message ID c9a282a8d23867387a1405f89aeb8ff72ffc9743.1526479118.git.echaudro@redhat.com
State Accepted
Delegated to: Ian Stokes
Headers show
Series [ovs-dev,v2,1/1] netdev-dpdk: Don't use PMD driver if not configured successfully | expand

Commit Message

Eelco Chaudron May 16, 2018, 2:15 p.m. UTC
When initialization of the DPDK PMD driver fails
(dpdk_eth_dev_init()), the reconfigure_datapath() function will remove
the port from dp_netdev, and the port is not used.

Now when bridge_reconfigure() is called again, no changes to the
previous failing netdev configuration are detected and therefore the
ports gets added to dp_netdev and used uninitialized. This is causing
exceptions...

The fix has two parts to it. First in netdev-dpdk.c we remember if the
DPDK port was started or not, and when calling
netdev_dpdk_reconfigure() we also try re-initialization if the port
was not already active. The second part of the change is in
dpif-netdev.c where it makes sure netdev_reconfigure() is called if
the port needs reconfiguration, as netdev_is_reconf_required() is only
true until netdev_reconfigure() is called (even if it fails).

v1 -> v2:

Comments

Stokes, Ian May 23, 2018, 1:42 p.m. UTC | #1
> When initialization of the DPDK PMD driver fails (dpdk_eth_dev_init()),
> the reconfigure_datapath() function will remove the port from dp_netdev,
> and the port is not used.
> 
> Now when bridge_reconfigure() is called again, no changes to the previous
> failing netdev configuration are detected and therefore the ports gets
> added to dp_netdev and used uninitialized. This is causing exceptions...
> 
> The fix has two parts to it. First in netdev-dpdk.c we remember if the
> DPDK port was started or not, and when calling
> netdev_dpdk_reconfigure() we also try re-initialization if the port was
> not already active. The second part of the change is in dpif-netdev.c
> where it makes sure netdev_reconfigure() is called if the port needs
> reconfiguration, as netdev_is_reconf_required() is only true until
> netdev_reconfigure() is called (even if it fails).

Thanks for this Eelco. The code LGTM, in terms of back porting I was able to re-produce the error it fixes on master, 2.9 and 2.8 so I'll backport it to them. However port errors seem to be handled correctly for 2.7 and 2.6 in my testing.

Have you seen the same behavior for 2.7 and 2.6 in your testing? We can backport to those also if you have but just wanted to confirm.

Thanks
Ian

> 
> v1 -> v2:
> =========
> Rebase and fixing minor style issue
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> Tested-by: Ciara Loftus <ciara.loftus@intel.com>
> ---
>  lib/dpif-netdev.c | 7 ++++---
>  lib/netdev-dpdk.c | 8 +++++++-
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> f86ed2aba..c68970fa6 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3480,8 +3480,6 @@ port_reconfigure(struct dp_netdev_port *port)
>      struct netdev *netdev = port->netdev;
>      int i, err;
> 
> -    port->need_reconfigure = false;
> -
>      /* Closes the existing 'rxq's. */
>      for (i = 0; i < port->n_rxq; i++) {
>          netdev_rxq_close(port->rxqs[i].rx);
> @@ -3491,7 +3489,7 @@ port_reconfigure(struct dp_netdev_port *port)
>      port->n_rxq = 0;
> 
>      /* Allows 'netdev' to apply the pending configuration changes. */
> -    if (netdev_is_reconf_required(netdev)) {
> +    if (netdev_is_reconf_required(netdev) || port->need_reconfigure) {
>          err = netdev_reconfigure(netdev);
>          if (err && (err != EOPNOTSUPP)) {
>              VLOG_ERR("Failed to set interface %s new configuration", @@ -
> 3525,6 +3523,9 @@ port_reconfigure(struct dp_netdev_port *port)
>      /* Parse affinity list to apply configuration for new queues. */
>      dpif_netdev_port_set_rxq_affinity(port, port->rxq_affinity_list);
> 
> +    /* If reconfiguration was successful mark it as such, so we can use
> it */
> +    port->need_reconfigure = false;
> +
>      return 0;
>  }
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> 87152a7b9..afddf6d53 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -366,6 +366,8 @@ struct netdev_dpdk {
> 
>          /* If true, device was attached by rte_eth_dev_attach(). */
>          bool attached;
> +        /* If true, rte_eth_dev_start() was successfully called */
> +        bool started;
>          struct eth_addr hwaddr;
>          int mtu;
>          int socket_id;
> @@ -910,6 +912,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>                   rte_strerror(-diag));
>          return -diag;
>      }
> +    dev->started = true;
> 
>      rte_eth_promiscuous_enable(dev->port_id);
>      rte_eth_allmulticast_enable(dev->port_id);
> @@ -1193,6 +1196,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>      ovs_mutex_lock(&dpdk_mutex);
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      if (dev->attached) {
>          rte_eth_dev_close(dev->port_id); @@ -3690,13 +3694,15 @@
> netdev_dpdk_reconfigure(struct netdev *netdev)
>          && dev->lsc_interrupt_mode == dev->requested_lsc_interrupt_mode
>          && dev->rxq_size == dev->requested_rxq_size
>          && dev->txq_size == dev->requested_txq_size
> -        && dev->socket_id == dev->requested_socket_id) {
> +        && dev->socket_id == dev->requested_socket_id
> +        && dev->started) {
>          /* Reconfiguration is unnecessary */
> 
>          goto out;
>      }
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      err = netdev_dpdk_mempool_configure(dev);
>      if (err && err != EEXIST) {
> --
> 2.14.3
Eelco Chaudron May 23, 2018, 1:52 p.m. UTC | #2
On 23/05/18 15:42, Stokes, Ian wrote:
>> When initialization of the DPDK PMD driver fails (dpdk_eth_dev_init()),
>> the reconfigure_datapath() function will remove the port from dp_netdev,
>> and the port is not used.
>>
>> Now when bridge_reconfigure() is called again, no changes to the previous
>> failing netdev configuration are detected and therefore the ports gets
>> added to dp_netdev and used uninitialized. This is causing exceptions...
>>
>> The fix has two parts to it. First in netdev-dpdk.c we remember if the
>> DPDK port was started or not, and when calling
>> netdev_dpdk_reconfigure() we also try re-initialization if the port was
>> not already active. The second part of the change is in dpif-netdev.c
>> where it makes sure netdev_reconfigure() is called if the port needs
>> reconfiguration, as netdev_is_reconf_required() is only true until
>> netdev_reconfigure() is called (even if it fails).
> Thanks for this Eelco. The code LGTM, in terms of back porting I was able to re-produce the error it fixes on master, 2.9 and 2.8 so I'll backport it to them. However port errors seem to be handled correctly for 2.7 and 2.6 in my testing.
>
> Have you seen the same behavior for 2.7 and 2.6 in your testing? We can backport to those also if you have but just wanted to confirm.
I only tested this on 2.8 and 2.9, I did not try any release earlier. If 
you tested 2.6-7 and it handles the port errors correctly, 2.8-9 seems 
enough to me.

> Thanks
> Ian
Stokes, Ian May 23, 2018, 2:38 p.m. UTC | #3
> On 23/05/18 15:42, Stokes, Ian wrote:
> >> When initialization of the DPDK PMD driver fails
> >> (dpdk_eth_dev_init()), the reconfigure_datapath() function will
> >> remove the port from dp_netdev, and the port is not used.
> >>
> >> Now when bridge_reconfigure() is called again, no changes to the
> >> previous failing netdev configuration are detected and therefore the
> >> ports gets added to dp_netdev and used uninitialized. This is causing
> exceptions...
> >>
> >> The fix has two parts to it. First in netdev-dpdk.c we remember if
> >> the DPDK port was started or not, and when calling
> >> netdev_dpdk_reconfigure() we also try re-initialization if the port
> >> was not already active. The second part of the change is in
> >> dpif-netdev.c where it makes sure netdev_reconfigure() is called if
> >> the port needs reconfiguration, as netdev_is_reconf_required() is
> >> only true until
> >> netdev_reconfigure() is called (even if it fails).
> > Thanks for this Eelco. The code LGTM, in terms of back porting I was
> able to re-produce the error it fixes on master, 2.9 and 2.8 so I'll
> backport it to them. However port errors seem to be handled correctly for
> 2.7 and 2.6 in my testing.
> >
> > Have you seen the same behavior for 2.7 and 2.6 in your testing? We can
> backport to those also if you have but just wanted to confirm.
> I only tested this on 2.8 and 2.9, I did not try any release earlier. If
> you tested 2.6-7 and it handles the port errors correctly, 2.8-9 seems
> enough to me.

Ok I'll backport to master, 2.9 & 2.8 for now. Again thanks for working on this.

Ian
> 
> > Thanks
> > Ian
diff mbox series

Patch

=========
Rebase and fixing minor style issue

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Tested-by: Ciara Loftus <ciara.loftus@intel.com>
---
 lib/dpif-netdev.c | 7 ++++---
 lib/netdev-dpdk.c | 8 +++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f86ed2aba..c68970fa6 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3480,8 +3480,6 @@  port_reconfigure(struct dp_netdev_port *port)
     struct netdev *netdev = port->netdev;
     int i, err;
 
-    port->need_reconfigure = false;
-
     /* Closes the existing 'rxq's. */
     for (i = 0; i < port->n_rxq; i++) {
         netdev_rxq_close(port->rxqs[i].rx);
@@ -3491,7 +3489,7 @@  port_reconfigure(struct dp_netdev_port *port)
     port->n_rxq = 0;
 
     /* Allows 'netdev' to apply the pending configuration changes. */
-    if (netdev_is_reconf_required(netdev)) {
+    if (netdev_is_reconf_required(netdev) || port->need_reconfigure) {
         err = netdev_reconfigure(netdev);
         if (err && (err != EOPNOTSUPP)) {
             VLOG_ERR("Failed to set interface %s new configuration",
@@ -3525,6 +3523,9 @@  port_reconfigure(struct dp_netdev_port *port)
     /* Parse affinity list to apply configuration for new queues. */
     dpif_netdev_port_set_rxq_affinity(port, port->rxq_affinity_list);
 
+    /* If reconfiguration was successful mark it as such, so we can use it */
+    port->need_reconfigure = false;
+
     return 0;
 }
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 87152a7b9..afddf6d53 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -366,6 +366,8 @@  struct netdev_dpdk {
 
         /* If true, device was attached by rte_eth_dev_attach(). */
         bool attached;
+        /* If true, rte_eth_dev_start() was successfully called */
+        bool started;
         struct eth_addr hwaddr;
         int mtu;
         int socket_id;
@@ -910,6 +912,7 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
                  rte_strerror(-diag));
         return -diag;
     }
+    dev->started = true;
 
     rte_eth_promiscuous_enable(dev->port_id);
     rte_eth_allmulticast_enable(dev->port_id);
@@ -1193,6 +1196,7 @@  netdev_dpdk_destruct(struct netdev *netdev)
     ovs_mutex_lock(&dpdk_mutex);
 
     rte_eth_dev_stop(dev->port_id);
+    dev->started = false;
 
     if (dev->attached) {
         rte_eth_dev_close(dev->port_id);
@@ -3690,13 +3694,15 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
         && dev->lsc_interrupt_mode == dev->requested_lsc_interrupt_mode
         && dev->rxq_size == dev->requested_rxq_size
         && dev->txq_size == dev->requested_txq_size
-        && dev->socket_id == dev->requested_socket_id) {
+        && dev->socket_id == dev->requested_socket_id
+        && dev->started) {
         /* Reconfiguration is unnecessary */
 
         goto out;
     }
 
     rte_eth_dev_stop(dev->port_id);
+    dev->started = false;
 
     err = netdev_dpdk_mempool_configure(dev);
     if (err && err != EEXIST) {