[ovs-dev,v2,2/4] ovsdb-idl.c: Allows retry even when using a single remote.
diff mbox series

Message ID 1565822223-90171-2-git-send-email-hzhou8@ebay.com
State New
Headers show
Series
  • [ovs-dev,v2,1/4] raft.c: Move raft_reset_ping_timer() out of the loop.
Related show

Commit Message

Han Zhou Aug. 14, 2019, 10:37 p.m. UTC
From: Han Zhou <hzhou8@ebay.com>

When clustered mode is used, the client needs to retry connecting
to new servers when certain failures happen. Today it is allowed to
retry new connection only if multiple remotes are used, which prevents
using LB VIP with clustered nodes. This patch makes sure the retry
logic works when using LB VIP: although same IP is used for retrying,
the LB can actually redirect the connection to a new node.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 lib/ovsdb-idl.c        | 11 +++-------
 tests/ovsdb-cluster.at | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test-ovsdb.c     |  1 +
 3 files changed, 61 insertions(+), 8 deletions(-)

Patch
diff mbox series

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 1a6a4af..190143f 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -657,12 +657,8 @@  ovsdb_idl_state_to_string(enum ovsdb_idl_state state)
 static void
 ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
 {
-    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) > 1) {
-        ovsdb_idl_force_reconnect(idl);
-        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
-    } else {
-        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
-    }
+    ovsdb_idl_force_reconnect(idl);
+    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
 }
 
 static void
@@ -1895,8 +1891,7 @@  ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
     if (!database) {
         VLOG_INFO_RL(&rl, "%s: server does not have %s database",
                      server_name, idl->data.class_->database);
-    } else if (!strcmp(database->model, "clustered")
-               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
+    } else if (!strcmp(database->model, "clustered")) {
         uint64_t index = database->n_index ? *database->index : 0;
 
         if (!database->schema) {
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 4701272..6a13843 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -63,6 +63,63 @@  m4_define([OVSDB_CHECK_EXECUTION],
 EXECUTION_EXAMPLES
 
 
+AT_BANNER([OVSDB - disconnect from cluster])
+
+# When a node is disconnected from the cluster, the IDL should disconnect
+# and retry even if it uses a single remote, because the remote IP can be
+# a VIP on a load-balance.
+AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 3`; do
+    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq 3`; do
+    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq 3`; do
+    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+      {"op": "insert",
+       "table": "simple",
+       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
+
+# Connect to a single remote s3.  Use "wait" to trigger a non-op transaction so
+# that test-ovsdb will not quit.
+
+on_exit 'pkill test-ovsdb'
+test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s3.ovsdb '[["idltest",
+      {"op": "wait",
+       "table": "simple",
+       "where": [["i", "==", 1]],
+       "columns": ["i"],
+       "until": "==",
+       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
+
+OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
+
+# Shutdown the other 2 servers so that s3 is disconnected from the cluster.
+for i in 2 1; do
+    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+# The test-ovsdb should detect the disconnect and retry.
+OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
+
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
+
+AT_CLEANUP
+
+
 OVS_START_SHELL_HELPERS
 # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
 ovsdb_cluster_failure_test () {
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 187eb28..b1a4be3 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -2412,6 +2412,7 @@  do_idl(struct ovs_cmdl_context *ctx)
     track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
 
     idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
+    ovsdb_idl_set_leader_only(idl, false);
     if (ctx->argc > 2) {
         struct stream *stream;