diff mbox series

[ovs-dev,3/7] raft: Fix raft_is_connected() when there is no leader yet.

Message ID 1582942030-31096-3-git-send-email-hzhou@ovn.org
State Accepted
Commit 2833885f7ab565ce07f40de2ab8d415dc0390329
Headers show
Series [ovs-dev,1/7] raft-rpc.c: Fix message format. | expand

Commit Message

Han Zhou Feb. 29, 2020, 2:07 a.m. UTC
If there is never a leader known by the current server, it's status
should be "disconnected" to the cluster. Without this patch, when
a server in cluster is restarted, before it successfully connecting
back to the cluster it will appear as connected, which is wrong.

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 ovsdb/raft.c           | 10 ++++++++--
 tests/ovsdb-cluster.at | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 4789bc4..6cd7b00 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -298,6 +298,11 @@  struct raft {
     bool had_leader;            /* There has been leader elected since last
                                    election initiated. This is to help setting
                                    candidate_retrying. */
+
+    /* For all. */
+    bool ever_had_leader;       /* There has been leader elected since the raft
+                                   is initialized, meaning it is ever
+                                   connected. */
 };
 
 /* All Raft structures. */
@@ -1024,7 +1029,8 @@  raft_is_connected(const struct raft *raft)
             && !raft->joining
             && !raft->leaving
             && !raft->left
-            && !raft->failed);
+            && !raft->failed
+            && raft->ever_had_leader);
     VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false");
     return ret;
 }
@@ -2519,7 +2525,7 @@  static void
 raft_set_leader(struct raft *raft, const struct uuid *sid)
 {
     raft->leader_sid = *sid;
-    raft->had_leader = true;
+    raft->ever_had_leader = raft->had_leader = true;
     raft->candidate_retrying = false;
 }
 
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 5b6188b..0aa4564 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -179,6 +179,41 @@  AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
 ovsdb_test_cluster_disconnect 5 leader yes
 AT_CLEANUP
 
+AT_SETUP([OVSDB cluster - initial status should be disconnected])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+
+n=3
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 $n`; do
+    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq $n`; do
+    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq $n`; do
+    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+# Stop all servers, and start the s1 only, to test initial connection status
+# when there is no leader yet.
+for i in `seq 1 $n`; do
+    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+i=1
+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+
+# The initial status should be disconnected. So wait should fail.
+AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore])
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+
+AT_CLEANUP
+
 
 
 AT_BANNER([OVSDB cluster election timer change])