diff mbox series

[ovs-dev,1/1] netdev-dpdk: Don't use PMD driver if not configured successfully

Message ID 247e9ea15649772de88df36f0fc7ea33d533fe31.1524741115.git.echaudro@redhat.com
State Superseded
Delegated to: Ian Stokes
Headers show
Series [ovs-dev,1/1] netdev-dpdk: Don't use PMD driver if not configured successfully | expand

Commit Message

Eelco Chaudron April 26, 2018, 11:12 a.m. UTC
When initialization of the DPDK PMD driver fails
(dpdk_eth_dev_init()), the reconfigure_datapath() function will remove
the port from dp_netdev, and the port is not used.

Now when bridge_reconfigure() is called again, no changes to the
previous failing netdev configuration are detected and therefore the
ports gets added to dp_netdev and used uninitialized. This is causing
exceptions...

The fix has two parts to it. First in netdev-dpdk.c we remember if the
DPDK port was started or not, and when calling
netdev_dpdk_reconfigure() we also try re-initialization if the port
was not already active. The second part of the change is in
dpif-netdev.c where it makes sure netdev_reconfigure() is called if
the port needs reconfiguration, as netdev_is_reconf_required() is only
true until netdev_reconfigure() is called (even if it fails).

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/dpif-netdev.c | 7 ++++---
 lib/netdev-dpdk.c | 8 +++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

Comments

Stokes, Ian April 26, 2018, 6:44 p.m. UTC | #1
> When initialization of the DPDK PMD driver fails (dpdk_eth_dev_init()),
> the reconfigure_datapath() function will remove the port from dp_netdev,
> and the port is not used.
> 
> Now when bridge_reconfigure() is called again, no changes to the previous
> failing netdev configuration are detected and therefore the ports gets
> added to dp_netdev and used uninitialized. This is causing exceptions...
> 
> The fix has two parts to it. First in netdev-dpdk.c we remember if the
> DPDK port was started or not, and when calling
> netdev_dpdk_reconfigure() we also try re-initialization if the port was
> not already active. The second part of the change is in dpif-netdev.c
> where it makes sure netdev_reconfigure() is called if the port needs
> reconfiguration, as netdev_is_reconf_required() is only true until
> netdev_reconfigure() is called (even if it fails).
> 

Good catch Eelco!

I've had sightings of this bug once or twice when investigating failures at the queue setup stage around setting mtu and configuration. 

I suspect it resolves the same issue.

I'll have time to test and review tomorrow.

Thanks
Ian

> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/dpif-netdev.c | 7 ++++---
>  lib/netdev-dpdk.c | 8 +++++++-
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> be31fd092..9b0916f4f 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3327,8 +3327,6 @@ port_reconfigure(struct dp_netdev_port *port)
>      struct netdev *netdev = port->netdev;
>      int i, err;
> 
> -    port->need_reconfigure = false;
> -
>      /* Closes the existing 'rxq's. */
>      for (i = 0; i < port->n_rxq; i++) {
>          netdev_rxq_close(port->rxqs[i].rx);
> @@ -3338,7 +3336,7 @@ port_reconfigure(struct dp_netdev_port *port)
>      port->n_rxq = 0;
> 
>      /* Allows 'netdev' to apply the pending configuration changes. */
> -    if (netdev_is_reconf_required(netdev)) {
> +    if (netdev_is_reconf_required(netdev) || port->need_reconfigure) {
>          err = netdev_reconfigure(netdev);
>          if (err && (err != EOPNOTSUPP)) {
>              VLOG_ERR("Failed to set interface %s new configuration", @@ -
> 3371,6 +3369,9 @@ port_reconfigure(struct dp_netdev_port *port)
>      /* Parse affinity list to apply configuration for new queues. */
>      dpif_netdev_port_set_rxq_affinity(port, port->rxq_affinity_list);
> 
> +    /* If reconfiguration was successful mark it as such, so we can use
> it */
> +    port->need_reconfigure = false;
> +
>      return 0;
>  }
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> b14140571..2a9128d68 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -356,6 +356,8 @@ struct netdev_dpdk {
> 
>          /* If true, device was attached by rte_eth_dev_attach(). */
>          bool attached;
> +        /* If true, rte_eth_dev_start() was successfully called */
> +        bool started;
>          struct eth_addr hwaddr;
>          int mtu;
>          int socket_id;
> @@ -816,6 +818,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>                   rte_strerror(-diag));
>          return -diag;
>      }
> +    dev->started = true;
> 
>      rte_eth_promiscuous_enable(dev->port_id);
>      rte_eth_allmulticast_enable(dev->port_id);
> @@ -1098,6 +1101,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>      ovs_mutex_lock(&dpdk_mutex);
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      if (dev->attached) {
>          rte_eth_dev_close(dev->port_id); @@ -3555,13 +3559,15 @@
> netdev_dpdk_reconfigure(struct netdev *netdev)
>          && dev->mtu == dev->requested_mtu
>          && dev->rxq_size == dev->requested_rxq_size
>          && dev->txq_size == dev->requested_txq_size
> -        && dev->socket_id == dev->requested_socket_id) {
> +        && dev->socket_id == dev->requested_socket_id &&
> +        dev->started) {
>          /* Reconfiguration is unnecessary */
> 
>          goto out;
>      }
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      err = netdev_dpdk_mempool_configure(dev);
>      if (err && err != EEXIST) {
> --
> 2.14.3
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Ciara Loftus May 16, 2018, 10:19 a.m. UTC | #2
> 
> When initialization of the DPDK PMD driver fails
> (dpdk_eth_dev_init()), the reconfigure_datapath() function will remove
> the port from dp_netdev, and the port is not used.
> 
> Now when bridge_reconfigure() is called again, no changes to the
> previous failing netdev configuration are detected and therefore the
> ports gets added to dp_netdev and used uninitialized. This is causing
> exceptions...
> 
> The fix has two parts to it. First in netdev-dpdk.c we remember if the
> DPDK port was started or not, and when calling
> netdev_dpdk_reconfigure() we also try re-initialization if the port
> was not already active. The second part of the change is in
> dpif-netdev.c where it makes sure netdev_reconfigure() is called if
> the port needs reconfiguration, as netdev_is_reconf_required() is only
> true until netdev_reconfigure() is called (even if it fails).
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

Tested-by: Ciara Loftus <ciara.loftus@intel.com>

Thanks for the patch, Eelco. I tested it by simulating an error in dpdk_eth_dev_init causing the dpdk port to be removed from dp_netdev. Before this patch, if a vhost port was added after this error, a reconfigure was triggered and I observed a SEGV. With this patch applied I no longer see a SEGV.

The patch needs a rebase and one minor style fix below.

Thanks,
Ciara

> ---
>  lib/dpif-netdev.c | 7 ++++---
>  lib/netdev-dpdk.c | 8 +++++++-
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index be31fd092..9b0916f4f 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3327,8 +3327,6 @@ port_reconfigure(struct dp_netdev_port *port)
>      struct netdev *netdev = port->netdev;
>      int i, err;
> 
> -    port->need_reconfigure = false;
> -
>      /* Closes the existing 'rxq's. */
>      for (i = 0; i < port->n_rxq; i++) {
>          netdev_rxq_close(port->rxqs[i].rx);
> @@ -3338,7 +3336,7 @@ port_reconfigure(struct dp_netdev_port *port)
>      port->n_rxq = 0;
> 
>      /* Allows 'netdev' to apply the pending configuration changes. */
> -    if (netdev_is_reconf_required(netdev)) {
> +    if (netdev_is_reconf_required(netdev) || port->need_reconfigure) {
>          err = netdev_reconfigure(netdev);
>          if (err && (err != EOPNOTSUPP)) {
>              VLOG_ERR("Failed to set interface %s new configuration",
> @@ -3371,6 +3369,9 @@ port_reconfigure(struct dp_netdev_port *port)
>      /* Parse affinity list to apply configuration for new queues. */
>      dpif_netdev_port_set_rxq_affinity(port, port->rxq_affinity_list);
> 
> +    /* If reconfiguration was successful mark it as such, so we can use it */
> +    port->need_reconfigure = false;
> +
>      return 0;
>  }
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index b14140571..2a9128d68 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -356,6 +356,8 @@ struct netdev_dpdk {
> 
>          /* If true, device was attached by rte_eth_dev_attach(). */
>          bool attached;
> +        /* If true, rte_eth_dev_start() was successfully called */
> +        bool started;
>          struct eth_addr hwaddr;
>          int mtu;
>          int socket_id;
> @@ -816,6 +818,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>                   rte_strerror(-diag));
>          return -diag;
>      }
> +    dev->started = true;
> 
>      rte_eth_promiscuous_enable(dev->port_id);
>      rte_eth_allmulticast_enable(dev->port_id);
> @@ -1098,6 +1101,7 @@ netdev_dpdk_destruct(struct netdev *netdev)
>      ovs_mutex_lock(&dpdk_mutex);
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      if (dev->attached) {
>          rte_eth_dev_close(dev->port_id);
> @@ -3555,13 +3559,15 @@ netdev_dpdk_reconfigure(struct netdev
> *netdev)
>          && dev->mtu == dev->requested_mtu
>          && dev->rxq_size == dev->requested_rxq_size
>          && dev->txq_size == dev->requested_txq_size
> -        && dev->socket_id == dev->requested_socket_id) {
> +        && dev->socket_id == dev->requested_socket_id &&
> +        dev->started) {

Move the && to the same line as dev->started to be coherent with the lines above.

>          /* Reconfiguration is unnecessary */
> 
>          goto out;
>      }
> 
>      rte_eth_dev_stop(dev->port_id);
> +    dev->started = false;
> 
>      err = netdev_dpdk_mempool_configure(dev);
>      if (err && err != EEXIST) {
> --
> 2.14.3
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eelco Chaudron May 16, 2018, 2:21 p.m. UTC | #3
On 16/05/18 12:19, Loftus, Ciara wrote:
>> When initialization of the DPDK PMD driver fails
>> (dpdk_eth_dev_init()), the reconfigure_datapath() function will remove
>> the port from dp_netdev, and the port is not used.
>>
>> Now when bridge_reconfigure() is called again, no changes to the
>> previous failing netdev configuration are detected and therefore the
>> ports gets added to dp_netdev and used uninitialized. This is causing
>> exceptions...
>>
>> The fix has two parts to it. First in netdev-dpdk.c we remember if the
>> DPDK port was started or not, and when calling
>> netdev_dpdk_reconfigure() we also try re-initialization if the port
>> was not already active. The second part of the change is in
>> dpif-netdev.c where it makes sure netdev_reconfigure() is called if
>> the port needs reconfiguration, as netdev_is_reconf_required() is only
>> true until netdev_reconfigure() is called (even if it fails).
>>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> Tested-by: Ciara Loftus <ciara.loftus@intel.com>
>
> Thanks for the patch, Eelco. I tested it by simulating an error in dpdk_eth_dev_init causing the dpdk port to be removed from dp_netdev. Before this patch, if a vhost port was added after this error, a reconfigure was triggered and I observed a SEGV. With this patch applied I no longer see a SEGV.
>
> The patch needs a rebase and one minor style fix below.
Thanks for testing it! I've sent out a re-based V2, and fixed the style 
issue.
> Thanks,
> Ciara
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index be31fd092..9b0916f4f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3327,8 +3327,6 @@  port_reconfigure(struct dp_netdev_port *port)
     struct netdev *netdev = port->netdev;
     int i, err;
 
-    port->need_reconfigure = false;
-
     /* Closes the existing 'rxq's. */
     for (i = 0; i < port->n_rxq; i++) {
         netdev_rxq_close(port->rxqs[i].rx);
@@ -3338,7 +3336,7 @@  port_reconfigure(struct dp_netdev_port *port)
     port->n_rxq = 0;
 
     /* Allows 'netdev' to apply the pending configuration changes. */
-    if (netdev_is_reconf_required(netdev)) {
+    if (netdev_is_reconf_required(netdev) || port->need_reconfigure) {
         err = netdev_reconfigure(netdev);
         if (err && (err != EOPNOTSUPP)) {
             VLOG_ERR("Failed to set interface %s new configuration",
@@ -3371,6 +3369,9 @@  port_reconfigure(struct dp_netdev_port *port)
     /* Parse affinity list to apply configuration for new queues. */
     dpif_netdev_port_set_rxq_affinity(port, port->rxq_affinity_list);
 
+    /* If reconfiguration was successful mark it as such, so we can use it */
+    port->need_reconfigure = false;
+
     return 0;
 }
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index b14140571..2a9128d68 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -356,6 +356,8 @@  struct netdev_dpdk {
 
         /* If true, device was attached by rte_eth_dev_attach(). */
         bool attached;
+        /* If true, rte_eth_dev_start() was successfully called */
+        bool started;
         struct eth_addr hwaddr;
         int mtu;
         int socket_id;
@@ -816,6 +818,7 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
                  rte_strerror(-diag));
         return -diag;
     }
+    dev->started = true;
 
     rte_eth_promiscuous_enable(dev->port_id);
     rte_eth_allmulticast_enable(dev->port_id);
@@ -1098,6 +1101,7 @@  netdev_dpdk_destruct(struct netdev *netdev)
     ovs_mutex_lock(&dpdk_mutex);
 
     rte_eth_dev_stop(dev->port_id);
+    dev->started = false;
 
     if (dev->attached) {
         rte_eth_dev_close(dev->port_id);
@@ -3555,13 +3559,15 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
         && dev->mtu == dev->requested_mtu
         && dev->rxq_size == dev->requested_rxq_size
         && dev->txq_size == dev->requested_txq_size
-        && dev->socket_id == dev->requested_socket_id) {
+        && dev->socket_id == dev->requested_socket_id &&
+        dev->started) {
         /* Reconfiguration is unnecessary */
 
         goto out;
     }
 
     rte_eth_dev_stop(dev->port_id);
+    dev->started = false;
 
     err = netdev_dpdk_mempool_configure(dev);
     if (err && err != EEXIST) {