diff mbox series

[ovs-dev,v3,3/5] ovsdb raft: Enable cooperative multitasking.

Message ID 20240116225205.38112-4-frode.nordahl@canonical.com
State Accepted
Delegated to: Ilya Maximets
Headers show
Series Introduce cooperative multitasking to improve OVSDB RAFT cluster operation. | expand

Checks

Context Check Description
ovsrobot/intel-ovs-compilation fail test: fail
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Frode Nordahl Jan. 16, 2024, 10:52 p.m. UTC
The OVSDB server is mostly synchronous and single threaded.  The
OVSDB RAFT storage engine operate under strict deadlines with
operational impact should the deadline be overrun.

Register for cooperative multitasking so that long running
processing elsewhere in the program may yield to allow stable
maintenance of the cluster.

Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 ovsdb/raft.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 8effd9ad1..f463afcb3 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -22,6 +22,7 @@ 
 #include <errno.h>
 #include <unistd.h>
 
+#include "cooperative-multitasking.h"
 #include "hash.h"
 #include "jsonrpc.h"
 #include "lockfile.h"
@@ -993,10 +994,13 @@  raft_reset_election_timer(struct raft *raft)
     raft->election_timeout = raft->election_base + duration;
 }
 
+#define RAFT_TIMER_THRESHOLD(t) (t / 3)
+
 static void
 raft_reset_ping_timer(struct raft *raft)
 {
-    raft->ping_timeout = time_msec() + raft->election_timer / 3;
+    raft->ping_timeout =
+        time_msec() + RAFT_TIMER_THRESHOLD(raft->election_timer);
 }
 
 static void
@@ -1371,6 +1375,8 @@  raft_take_leadership(struct raft *raft)
     }
 }
 
+static void raft_run_cb(void *arg);
+
 /* Closes everything owned by 'raft' that might be visible outside the process:
  * network connections, commands, etc.  This is part of closing 'raft'; it is
  * also used if 'raft' has failed in an unrecoverable way. */
@@ -1397,6 +1403,8 @@  raft_close__(struct raft *raft)
     LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) {
         raft_conn_close(conn);
     }
+
+    cooperative_multitasking_remove(&raft_run_cb, raft);
 }
 
 /* Closes and frees 'raft'.
@@ -2114,6 +2122,11 @@  raft_run(struct raft *raft)
         raft_reset_ping_timer(raft);
     }
 
+    cooperative_multitasking_set(
+        &raft_run_cb, (void *) raft, time_msec(),
+        RAFT_TIMER_THRESHOLD(raft->election_timer)
+        + RAFT_TIMER_THRESHOLD(raft->election_timer) / 10, "raft_run");
+
     /* Do this only at the end; if we did it as soon as we set raft->left or
      * raft->failed in handling the RemoveServerReply, then it could easily
      * cause references to freed memory in RPC sessions, etc. */
@@ -2122,6 +2135,14 @@  raft_run(struct raft *raft)
     }
 }
 
+static void
+raft_run_cb(void *arg)
+{
+    struct raft *raft = (struct raft *) arg;
+
+    raft_run(raft);
+}
+
 static void
 raft_wait_session(struct jsonrpc_session *js)
 {