diff mbox series

[ovs-dev,v2,2/5] ovsdb: raft: Fix time intervals for multitasking while joining.

Message ID 20240326172717.1454071-3-i.maximets@ovn.org
State Accepted
Commit bcad733e2ce36437ab503bc53d87dd80b9d7d336
Headers show
Series ovsdb: raft: Fixes for cluster joining state. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Ilya Maximets March 26, 2024, 5:27 p.m. UTC
While joining, ovsdb-server may not wake up for a duration of a join
timer, which is 1 second and is by default 3x larger than a heartbeat
timer.  This is causing unnecessary warnings from the cooperative
multitasking module that thinks that we missed the heartbeat time by
a lot.

Use join timer (1000) instead while joining.

Fixes: d4a15647b917 ("ovsdb: raft: Enable cooperative multitasking.")
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---

CC: Frode Nordahl <frode.nordahl@canonical.com>

 ovsdb/raft.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index b171da345..ec3a0ff66 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -280,6 +280,7 @@  struct raft {
     /* Used for joining a cluster. */
     bool joining;                 /* Attempting to join the cluster? */
     struct sset remote_addresses; /* Addresses to try to find other servers. */
+#define RAFT_JOIN_TIMEOUT_MS 1000
     long long int join_timeout;   /* Time to re-send add server request. */
 
     /* Used for leaving a cluster. */
@@ -1083,7 +1084,7 @@  raft_open(struct ovsdb_log *log, struct raft **raftp)
             raft_start_election(raft, false, false);
         }
     } else {
-        raft->join_timeout = time_msec() + 1000;
+        raft->join_timeout = time_msec() + RAFT_JOIN_TIMEOUT_MS;
     }
 
     raft_reset_ping_timer(raft);
@@ -2128,7 +2129,7 @@  raft_run(struct raft *raft)
     }
 
     if (raft->joining && time_msec() >= raft->join_timeout) {
-        raft->join_timeout = time_msec() + 1000;
+        raft->join_timeout = time_msec() + RAFT_JOIN_TIMEOUT_MS;
         LIST_FOR_EACH (conn, list_node, &raft->conns) {
             raft_send_add_server_request(raft, conn);
         }
@@ -2162,10 +2163,12 @@  raft_run(struct raft *raft)
         raft_reset_ping_timer(raft);
     }
 
+    uint64_t interval = raft->joining
+                        ? RAFT_JOIN_TIMEOUT_MS
+                        : RAFT_TIMER_THRESHOLD(raft->election_timer);
     cooperative_multitasking_set(
         &raft_run_cb, (void *) raft, time_msec(),
-        RAFT_TIMER_THRESHOLD(raft->election_timer)
-        + RAFT_TIMER_THRESHOLD(raft->election_timer) / 10, "raft_run");
+        interval + interval / 10, "raft_run");
 
     /* Do this only at the end; if we did it as soon as we set raft->left or
      * raft->failed in handling the RemoveServerReply, then it could easily