diff mbox series

[ovs-dev,v2] netdev-dpdk: Fix deadlock due to virtqueue stats retrieval.

Message ID 20230117080425.1994893-1-david.marchand@redhat.com
State Accepted
Commit e24b68fa708c1c31388bee24ebe781dc49b284da
Headers show
Series [ovs-dev,v2] netdev-dpdk: Fix deadlock due to virtqueue stats retrieval. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

David Marchand Jan. 17, 2023, 8:04 a.m. UTC
As Ilya reported, we have a ABBA deadlock between DPDK vq->access_lock
and OVS dev->mutex when OVS main thread refreshes statistics, while a
vring state change event is being processed for a same vhost port.

To break from this situation, move vring state change notifications
handling from the vhost-events DPDK thread to a dedicated thread
using a lockless queue.

Besides, for the case when a bogus/malicious guest is sending continuous
updates, add a counter of pending updates in the queue and warn if a
threshold of 1000 entries is reached.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401101.html
Fixes: 3b29286db1c5 ("netdev-dpdk: Add per virtqueue statistics.")
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- moved handling of events to a dedicated thread,
- added a warning if a fair number of updates is pending,

---
 lib/netdev-dpdk.c | 122 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 102 insertions(+), 20 deletions(-)

Comments

Maxime Coquelin Jan. 17, 2023, 8:47 a.m. UTC | #1
Hi David,

On 1/17/23 09:04, David Marchand wrote:
> As Ilya reported, we have a ABBA deadlock between DPDK vq->access_lock
> and OVS dev->mutex when OVS main thread refreshes statistics, while a
> vring state change event is being processed for a same vhost port.
> 
> To break from this situation, move vring state change notifications
> handling from the vhost-events DPDK thread to a dedicated thread
> using a lockless queue.
> 
> Besides, for the case when a bogus/malicious guest is sending continuous
> updates, add a counter of pending updates in the queue and warn if a
> threshold of 1000 entries is reached.
> 
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401101.html
> Fixes: 3b29286db1c5 ("netdev-dpdk: Add per virtqueue statistics.")
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since v1:
> - moved handling of events to a dedicated thread,
> - added a warning if a fair number of updates is pending,
> 
> ---
>   lib/netdev-dpdk.c | 122 ++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 102 insertions(+), 20 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
Ilya Maximets Jan. 19, 2023, 8:09 p.m. UTC | #2
On 1/17/23 09:47, Maxime Coquelin wrote:
> Hi David,
> 
> On 1/17/23 09:04, David Marchand wrote:
>> As Ilya reported, we have a ABBA deadlock between DPDK vq->access_lock
>> and OVS dev->mutex when OVS main thread refreshes statistics, while a
>> vring state change event is being processed for a same vhost port.
>>
>> To break from this situation, move vring state change notifications
>> handling from the vhost-events DPDK thread to a dedicated thread
>> using a lockless queue.
>>
>> Besides, for the case when a bogus/malicious guest is sending continuous
>> updates, add a counter of pending updates in the queue and warn if a
>> threshold of 1000 entries is reached.
>>
>> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401101.html
>> Fixes: 3b29286db1c5 ("netdev-dpdk: Add per virtqueue statistics.")
>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>> ---
>> Changes since v1:
>> - moved handling of events to a dedicated thread,
>> - added a warning if a fair number of updates is pending,
>>
>> ---
>>   lib/netdev-dpdk.c | 122 ++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 102 insertions(+), 20 deletions(-)
>>
> 
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks, David and Maxime!

Applied and backported to 3.1.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5e2d64651d..916fae39b7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -49,6 +49,7 @@ 
 #include "dpif-netdev.h"
 #include "fatal-signal.h"
 #include "if-notifier.h"
+#include "mpsc-queue.h"
 #include "netdev-provider.h"
 #include "netdev-vport.h"
 #include "odp-util.h"
@@ -4255,30 +4256,38 @@  destroy_device(int vid)
     }
 }
 
-static int
-vring_state_changed(int vid, uint16_t queue_id, int enable)
+static struct mpsc_queue vhost_state_change_queue
+    = MPSC_QUEUE_INITIALIZER(&vhost_state_change_queue);
+static atomic_uint64_t vhost_state_change_queue_size;
+
+struct vhost_state_change {
+    struct mpsc_queue_node node;
+    char ifname[IF_NAME_SZ];
+    uint16_t queue_id;
+    int enable;
+};
+
+static void
+vring_state_changed__(struct vhost_state_change *sc)
 {
     struct netdev_dpdk *dev;
     bool exists = false;
-    int qid = queue_id / VIRTIO_QNUM;
-    bool is_rx = (queue_id % VIRTIO_QNUM) == VIRTIO_TXQ;
-    char ifname[IF_NAME_SZ];
-
-    rte_vhost_get_ifname(vid, ifname, sizeof ifname);
+    int qid = sc->queue_id / VIRTIO_QNUM;
+    bool is_rx = (sc->queue_id % VIRTIO_QNUM) == VIRTIO_TXQ;
 
     ovs_mutex_lock(&dpdk_mutex);
     LIST_FOR_EACH (dev, list_node, &dpdk_list) {
         ovs_mutex_lock(&dev->mutex);
-        if (nullable_string_is_equal(ifname, dev->vhost_id)) {
+        if (nullable_string_is_equal(sc->ifname, dev->vhost_id)) {
             if (is_rx) {
                 bool old_state = dev->vhost_rxq_enabled[qid];
 
-                dev->vhost_rxq_enabled[qid] = enable != 0;
+                dev->vhost_rxq_enabled[qid] = sc->enable != 0;
                 if (old_state != dev->vhost_rxq_enabled[qid]) {
                     netdev_change_seq_changed(&dev->up);
                 }
             } else {
-                if (enable) {
+                if (sc->enable) {
                     dev->tx_q[qid].map = qid;
                 } else {
                     dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
@@ -4295,11 +4304,69 @@  vring_state_changed(int vid, uint16_t queue_id, int enable)
 
     if (exists) {
         VLOG_INFO("State of queue %d ( %s_qid %d ) of vhost device '%s' "
-                  "changed to \'%s\'", queue_id, is_rx == true ? "rx" : "tx",
-                  qid, ifname, (enable == 1) ? "enabled" : "disabled");
+                  "changed to \'%s\'", sc->queue_id, is_rx ? "rx" : "tx",
+                  qid, sc->ifname, sc->enable == 1 ? "enabled" : "disabled");
     } else {
-        VLOG_INFO("vHost Device '%s' not found", ifname);
-        return -1;
+        VLOG_INFO("vHost Device '%s' not found", sc->ifname);
+    }
+}
+
+#define NETDEV_DPDK_VHOST_EVENTS_BACKOFF_MIN 1
+#define NETDEV_DPDK_VHOST_EVENTS_BACKOFF_MAX 64
+static void *
+netdev_dpdk_vhost_events_main(void *arg OVS_UNUSED)
+{
+    mpsc_queue_acquire(&vhost_state_change_queue);
+
+    while (true) {
+        struct mpsc_queue_node *node;
+        uint64_t backoff;
+
+        backoff = NETDEV_DPDK_VHOST_EVENTS_BACKOFF_MIN;
+        while (mpsc_queue_tail(&vhost_state_change_queue) == NULL) {
+            xnanosleep(backoff * 1E6);
+            if (backoff < NETDEV_DPDK_VHOST_EVENTS_BACKOFF_MAX) {
+                backoff <<= 1;
+            }
+        }
+
+        MPSC_QUEUE_FOR_EACH_POP (node, &vhost_state_change_queue) {
+            struct vhost_state_change *sc;
+
+            sc = CONTAINER_OF(node, struct vhost_state_change, node);
+            vring_state_changed__(sc);
+            free(sc);
+            atomic_count_dec64(&vhost_state_change_queue_size);
+        }
+    }
+
+    OVS_NOT_REACHED();
+    mpsc_queue_release(&vhost_state_change_queue);
+
+    return NULL;
+}
+
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+    static struct vlog_rate_limit vhost_rl = VLOG_RATE_LIMIT_INIT(5, 5);
+    struct vhost_state_change *sc;
+
+    sc = xmalloc(sizeof *sc);
+    if (!rte_vhost_get_ifname(vid, sc->ifname, sizeof sc->ifname)) {
+        uint64_t queue_size;
+
+        sc->queue_id = queue_id;
+        sc->enable = enable;
+        mpsc_queue_insert(&vhost_state_change_queue, &sc->node);
+        queue_size = atomic_count_inc64(&vhost_state_change_queue_size);
+        if (queue_size >= 1000) {
+            VLOG_WARN_RL(&vhost_rl, "vring state change queue has %"PRIu64" "
+                         "entries. Last update was for socket %s.", queue_size,
+                         sc->ifname);
+        }
+    } else {
+        free(sc);
     }
 
     return 0;
@@ -4366,12 +4433,6 @@  netdev_dpdk_get_vid(const struct netdev_dpdk *dev)
     return ovsrcu_index_get(&dev->vid);
 }
 
-struct ingress_policer *
-netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
-{
-    return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
-}
-
 static int
 netdev_dpdk_class_init(void)
 {
@@ -4409,8 +4470,27 @@  netdev_dpdk_class_init(void)
     return 0;
 }
 
+static int
+netdev_dpdk_vhost_class_init(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+    if (ovsthread_once_start(&once)) {
+        ovs_thread_create("ovs_vhost", netdev_dpdk_vhost_events_main, NULL);
+        ovsthread_once_done(&once);
+    }
+
+    return 0;
+}
+
 /* QoS Functions */
 
+struct ingress_policer *
+netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
+{
+    return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
+}
+
 /*
  * Initialize QoS configuration operations.
  */
@@ -5751,6 +5831,7 @@  static const struct netdev_class dpdk_class = {
 static const struct netdev_class dpdk_vhost_class = {
     .type = "dpdkvhostuser",
     NETDEV_DPDK_CLASS_COMMON,
+    .init = netdev_dpdk_vhost_class_init,
     .construct = netdev_dpdk_vhost_construct,
     .destruct = netdev_dpdk_vhost_destruct,
     .send = netdev_dpdk_vhost_send,
@@ -5766,6 +5847,7 @@  static const struct netdev_class dpdk_vhost_class = {
 static const struct netdev_class dpdk_vhost_client_class = {
     .type = "dpdkvhostuserclient",
     NETDEV_DPDK_CLASS_COMMON,
+    .init = netdev_dpdk_vhost_class_init,
     .construct = netdev_dpdk_vhost_client_construct,
     .destruct = netdev_dpdk_vhost_destruct,
     .set_config = netdev_dpdk_vhost_client_set_config,