[ovs-dev,2/4] ovsdb-idl.c: Allows retry even when using a single remote.
diff mbox series

Message ID 1565713402-5458-2-git-send-email-hzhou8@ebay.com
State New
Headers show
Series
  • [ovs-dev,1/4] raft.c: Move raft_reset_ping_timer() out of the loop.
Related show

Commit Message

Han Zhou Aug. 13, 2019, 4:23 p.m. UTC
From: Han Zhou <hzhou8@ebay.com>

When clustered mode is used, the client needs to retry connecting
to new servers when certain failures happen. Today it is allowed to
retry new connection only if multiple remotes are used, which prevents
using LB VIP with clustered nodes. This patch makes sure the retry
logic works when using LB VIP: although same IP is used for retrying,
the LB can actually redirect the connection to a new node.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 lib/ovsdb-idl.c        | 11 +++-------
 tests/ovsdb-cluster.at | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test-ovsdb.c     |  1 +
 3 files changed, 61 insertions(+), 8 deletions(-)

Comments

aginwala Aug. 14, 2019, 2:13 a.m. UTC | #1
On Tue, Aug 13, 2019 at 7:07 PM aginwala <aginwala@asu.edu> wrote:

> Thanks for the fixes. Found a bug in current code which breaks both nbctl
> with local socket and daemon mode on follower nodes. Also nbctl daemon mode
> always tries to connect to leader node by default which also failed to
> connect.
> e.g. export OVN_NB_DB=tcp:<lb_vip>:6641; ovn-nbctl  --pidfile --detach.
>  2019-08-14T01:07:06Z|00036|ovsdb_idl|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
> clustered database server is not cluster leader; trying another server
> 2019-08-14T01:07:06Z|00037|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
> entering RECONNECT
> 2019-08-14T01:07:06Z|00038|ovsdb_idl|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
> SERVER_MONITOR_COND_REQUESTED -> RETRY at lib/ovsdb-idl.c:1917
> 2019-08-14T01:07:06Z|00039|poll_loop|DBG|wakeup due to 0-ms timeout at
> lib/reconnect.c:643
> 2019-08-14T01:07:06Z|00040|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
> connection attempt timed out
> 2019-08-14T01:07:06Z|00041|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
> waiting 2 seconds before reconnect
> 2019-08-14T01:07:06Z|00042|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
> entering BACKOFF
>
> Need to explicitly specify no-leader-only to work around.  ovn-nbctl
> --pidfile --detach --no-leader-only.
> For LB VIP, since LB sees all nodes are active, connections are
> established to all cluster nodes equally. I am using round robin setting
> for LB VIP for 3 node cluster using raft.
>
> Hence, are we always going to avoid this behavior because this is forcing
> to always connect to leader and not to followers? So if we use this
> approach, idl will show ovn-nbctl: tcp:<lb_vip>:6641: database connection
> failed () if requests reaches followers and only processes success if
> request reaches leader node with this patch.
>
>
> On Tue, Aug 13, 2019 at 9:23 AM Han Zhou <zhouhan@gmail.com> wrote:
>
>> From: Han Zhou <hzhou8@ebay.com>
>>
>> When clustered mode is used, the client needs to retry connecting
>> to new servers when certain failures happen. Today it is allowed to
>> retry new connection only if multiple remotes are used, which prevents
>> using LB VIP with clustered nodes. This patch makes sure the retry
>> logic works when using LB VIP: although same IP is used for retrying,
>> the LB can actually redirect the connection to a new node.
>>
>> Signed-off-by: Han Zhou <hzhou8@ebay.com>
>> ---
>>  lib/ovsdb-idl.c        | 11 +++-------
>>  tests/ovsdb-cluster.at | 57
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  tests/test-ovsdb.c     |  1 +
>>  3 files changed, 61 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>> index 1a6a4af..190143f 100644
>> --- a/lib/ovsdb-idl.c
>> +++ b/lib/ovsdb-idl.c
>> @@ -657,12 +657,8 @@ ovsdb_idl_state_to_string(enum ovsdb_idl_state state)
>>  static void
>>  ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
>>  {
>> -    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) > 1)
>> {
>> -        ovsdb_idl_force_reconnect(idl);
>> -        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>> -    } else {
>> -        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
>> -    }
>> +    ovsdb_idl_force_reconnect(idl);
>> +    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>>  }
>>
>>  static void
>> @@ -1895,8 +1891,7 @@ ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
>>      if (!database) {
>>          VLOG_INFO_RL(&rl, "%s: server does not have %s database",
>>                       server_name, idl->data.class_->database);
>> -    } else if (!strcmp(database->model, "clustered")
>> -               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
>> +    } else if (!strcmp(database->model, "clustered")) {
>>
> > I think this check jsonrpc_session_get_n_remotes(idl->session) > 1 is
> still needed for follower nodes for monitor condition to avoid this bug.
> Correct me if I am wrong or missed any case.
>
>          uint64_t index = database->n_index ? *database->index : 0;
>>
>>          if (!database->schema) {
>> diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
>> index 4701272..6a13843 100644
>> --- a/tests/ovsdb-cluster.at
>> +++ b/tests/ovsdb-cluster.at
>> @@ -63,6 +63,63 @@ m4_define([OVSDB_CHECK_EXECUTION],
>>  EXECUTION_EXAMPLES
>>
>>
>> +AT_BANNER([OVSDB - disconnect from cluster])
>> +
>> +# When a node is disconnected from the cluster, the IDL should disconnect
>> +# and retry even if it uses a single remote, because the remote IP can be
>> +# a VIP on a load-balance.
>> +AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
>> +AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
>> +
>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>> +ordinal_schema > schema
>> +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db
>> $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
>> +cid=`ovsdb-tool db-cid s1.db`
>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>> +for i in `seq 2 3`; do
>> +    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft
>> unix:s1.raft])
>> +done
>> +
>> +on_exit 'kill `cat *.pid`'
>> +for i in `seq 3`; do
>> +    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach
>> --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i
>> --remote=punix:s$i.ovsdb s$i.db])
>> +done
>> +for i in `seq 3`; do
>> +    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
>> +done
>> +
>> +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
>> +      {"op": "insert",
>> +       "table": "simple",
>> +       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
>> +
>> +# Connect to a single remote s3.  Use "wait" to trigger a non-op
>> transaction so
>> +# that test-ovsdb will not quit.
>> +
>> +on_exit 'pkill test-ovsdb'
>> +test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl
>> unix:s3.ovsdb '[["idltest",
>> +      {"op": "wait",
>> +       "table": "simple",
>> +       "where": [["i", "==", 1]],
>> +       "columns": ["i"],
>> +       "until": "==",
>> +       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
>> +
>> +OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
>> +
>> +# Shutdown the other 2 servers so that s3 is disconnected from the
>> cluster.
>> +for i in 2 1; do
>> +    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
>> +done
>> +
>> +# The test-ovsdb should detect the disconnect and retry.
>> +OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
>> +
>> +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
>> +
>> +AT_CLEANUP
>> +
>> +
>>  OVS_START_SHELL_HELPERS
>>  # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
>>  ovsdb_cluster_failure_test () {
>> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
>> index 187eb28..b1a4be3 100644
>> --- a/tests/test-ovsdb.c
>> +++ b/tests/test-ovsdb.c
>> @@ -2412,6 +2412,7 @@ do_idl(struct ovs_cmdl_context *ctx)
>>      track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
>>
>>      idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
>> +    ovsdb_idl_set_leader_only(idl, false);
>>      if (ctx->argc > 2) {
>>          struct stream *stream;
>>
>> --
>> 2.1.0
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
Han Zhou Aug. 14, 2019, 10:21 p.m. UTC | #2
Hi Aginwala, thanks for the testing. The change of this patch will cause
the IDL to avoid connecting to a follower if "leader_only" is set to
"true". It is the same behavior as before when multiple remotes are used,
but now it just does the same even when a single remote is used, because
the single remote could be a VIP, so it is desired  behavior. For
ovn-nbctl, "leader_only" is default to true, so it is expected that it
refuses to connect if the remote is a follower. However, if you are using
daemon mode ovn-nbctl, and if you didn't set "--no-leader-only", it should
keep retrying until it connects to a leader (depends on LB redirecting to
different servers). I agree this may cause some confusion when a user of
ovn-nbctl connects to only a single remote of a cluster, he/she could get
failure if --no-leader-only is not specified, but it is better than let
user believe they are connected to a leader while in reality connected to a
follower. Maybe I can improve the ovn-nbctl/sbctl documentation to emphasis
this to avoid confusion.

On Tue, Aug 13, 2019 at 7:13 PM aginwala <aginwala@asu.edu> wrote:

>
>
> On Tue, Aug 13, 2019 at 7:07 PM aginwala <aginwala@asu.edu> wrote:
>
>> Thanks for the fixes. Found a bug in current code which breaks both nbctl
>> with local socket and daemon mode on follower nodes. Also nbctl daemon mode
>> always tries to connect to leader node by default which also failed to
>> connect.
>> e.g. export OVN_NB_DB=tcp:<lb_vip>:6641; ovn-nbctl  --pidfile --detach.
>>  2019-08-14T01:07:06Z|00036|ovsdb_idl|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>> clustered database server is not cluster leader; trying another server
>> 2019-08-14T01:07:06Z|00037|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>> entering RECONNECT
>> 2019-08-14T01:07:06Z|00038|ovsdb_idl|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>> SERVER_MONITOR_COND_REQUESTED -> RETRY at lib/ovsdb-idl.c:1917
>> 2019-08-14T01:07:06Z|00039|poll_loop|DBG|wakeup due to 0-ms timeout at
>> lib/reconnect.c:643
>> 2019-08-14T01:07:06Z|00040|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>> connection attempt timed out
>> 2019-08-14T01:07:06Z|00041|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>> waiting 2 seconds before reconnect
>> 2019-08-14T01:07:06Z|00042|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>> entering BACKOFF
>>
>> Need to explicitly specify no-leader-only to work around.  ovn-nbctl
>> --pidfile --detach --no-leader-only.
>> For LB VIP, since LB sees all nodes are active, connections are
>> established to all cluster nodes equally. I am using round robin setting
>> for LB VIP for 3 node cluster using raft.
>>
>> Hence, are we always going to avoid this behavior because this is forcing
>> to always connect to leader and not to followers? So if we use this
>> approach, idl will show ovn-nbctl: tcp:<lb_vip>:6641: database connection
>> failed () if requests reaches followers and only processes success if
>> request reaches leader node with this patch.
>>
>>
>> On Tue, Aug 13, 2019 at 9:23 AM Han Zhou <zhouhan@gmail.com> wrote:
>>
>>> From: Han Zhou <hzhou8@ebay.com>
>>>
>>> When clustered mode is used, the client needs to retry connecting
>>> to new servers when certain failures happen. Today it is allowed to
>>> retry new connection only if multiple remotes are used, which prevents
>>> using LB VIP with clustered nodes. This patch makes sure the retry
>>> logic works when using LB VIP: although same IP is used for retrying,
>>> the LB can actually redirect the connection to a new node.
>>>
>>> Signed-off-by: Han Zhou <hzhou8@ebay.com>
>>> ---
>>>  lib/ovsdb-idl.c        | 11 +++-------
>>>  tests/ovsdb-cluster.at | 57
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  tests/test-ovsdb.c     |  1 +
>>>  3 files changed, 61 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>> index 1a6a4af..190143f 100644
>>> --- a/lib/ovsdb-idl.c
>>> +++ b/lib/ovsdb-idl.c
>>> @@ -657,12 +657,8 @@ ovsdb_idl_state_to_string(enum ovsdb_idl_state
>>> state)
>>>  static void
>>>  ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
>>>  {
>>> -    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) >
>>> 1) {
>>> -        ovsdb_idl_force_reconnect(idl);
>>> -        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>>> -    } else {
>>> -        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
>>> -    }
>>> +    ovsdb_idl_force_reconnect(idl);
>>> +    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>>>  }
>>>
>>>  static void
>>> @@ -1895,8 +1891,7 @@ ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
>>>      if (!database) {
>>>          VLOG_INFO_RL(&rl, "%s: server does not have %s database",
>>>                       server_name, idl->data.class_->database);
>>> -    } else if (!strcmp(database->model, "clustered")
>>> -               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
>>> +    } else if (!strcmp(database->model, "clustered")) {
>>>
>> > I think this check jsonrpc_session_get_n_remotes(idl->session) > 1 is
>> still needed for follower nodes for monitor condition to avoid this bug.
>> Correct me if I am wrong or missed any case.
>>
>>          uint64_t index = database->n_index ? *database->index : 0;
>>>
>>>          if (!database->schema) {
>>> diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
>>> index 4701272..6a13843 100644
>>> --- a/tests/ovsdb-cluster.at
>>> +++ b/tests/ovsdb-cluster.at
>>> @@ -63,6 +63,63 @@ m4_define([OVSDB_CHECK_EXECUTION],
>>>  EXECUTION_EXAMPLES
>>>
>>>
>>> +AT_BANNER([OVSDB - disconnect from cluster])
>>> +
>>> +# When a node is disconnected from the cluster, the IDL should
>>> disconnect
>>> +# and retry even if it uses a single remote, because the remote IP can
>>> be
>>> +# a VIP on a load-balance.
>>> +AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
>>> +AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
>>> +
>>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>>> +ordinal_schema > schema
>>> +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db
>>> $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
>>> +cid=`ovsdb-tool db-cid s1.db`
>>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>>> +for i in `seq 2 3`; do
>>> +    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft
>>> unix:s1.raft])
>>> +done
>>> +
>>> +on_exit 'kill `cat *.pid`'
>>> +for i in `seq 3`; do
>>> +    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach
>>> --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i
>>> --remote=punix:s$i.ovsdb s$i.db])
>>> +done
>>> +for i in `seq 3`; do
>>> +    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
>>> +done
>>> +
>>> +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
>>> +      {"op": "insert",
>>> +       "table": "simple",
>>> +       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
>>> +
>>> +# Connect to a single remote s3.  Use "wait" to trigger a non-op
>>> transaction so
>>> +# that test-ovsdb will not quit.
>>> +
>>> +on_exit 'pkill test-ovsdb'
>>> +test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl
>>> unix:s3.ovsdb '[["idltest",
>>> +      {"op": "wait",
>>> +       "table": "simple",
>>> +       "where": [["i", "==", 1]],
>>> +       "columns": ["i"],
>>> +       "until": "==",
>>> +       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
>>> +
>>> +OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
>>> +
>>> +# Shutdown the other 2 servers so that s3 is disconnected from the
>>> cluster.
>>> +for i in 2 1; do
>>> +    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
>>> +done
>>> +
>>> +# The test-ovsdb should detect the disconnect and retry.
>>> +OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
>>> +
>>> +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
>>> +
>>> +AT_CLEANUP
>>> +
>>> +
>>>  OVS_START_SHELL_HELPERS
>>>  # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
>>>  ovsdb_cluster_failure_test () {
>>> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
>>> index 187eb28..b1a4be3 100644
>>> --- a/tests/test-ovsdb.c
>>> +++ b/tests/test-ovsdb.c
>>> @@ -2412,6 +2412,7 @@ do_idl(struct ovs_cmdl_context *ctx)
>>>      track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
>>>
>>>      idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true,
>>> true);
>>> +    ovsdb_idl_set_leader_only(idl, false);
>>>      if (ctx->argc > 2) {
>>>          struct stream *stream;
>>>
>>> --
>>> 2.1.0
>>>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>
>>
aginwala Aug. 15, 2019, 1:40 a.m. UTC | #3
Sure. Thanks for re-validating different corner cases with me. Yup, we can
update more details in  leader-only section about using single LB VIP while
accessing clustered db via  ovn-nbctl/ovn-sbctl for sure to avoid
confusion.

On Wed, Aug 14, 2019 at 3:21 PM Han Zhou <zhouhan@gmail.com> wrote:

> Hi Aginwala, thanks for the testing. The change of this patch will cause
> the IDL to avoid connecting to a follower if "leader_only" is set to
> "true". It is the same behavior as before when multiple remotes are used,
> but now it just does the same even when a single remote is used, because
> the single remote could be a VIP, so it is desired  behavior. For
> ovn-nbctl, "leader_only" is default to true, so it is expected that it
> refuses to connect if the remote is a follower. However, if you are using
> daemon mode ovn-nbctl, and if you didn't set "--no-leader-only", it should
> keep retrying until it connects to a leader (depends on LB redirecting to
> different servers). I agree this may cause some confusion when a user of
> ovn-nbctl connects to only a single remote of a cluster, he/she could get
> failure if --no-leader-only is not specified, but it is better than let
> user believe they are connected to a leader while in reality connected to a
> follower. Maybe I can improve the ovn-nbctl/sbctl documentation to emphasis
> this to avoid confusion.
>
> On Tue, Aug 13, 2019 at 7:13 PM aginwala <aginwala@asu.edu> wrote:
>
>>
>>
>> On Tue, Aug 13, 2019 at 7:07 PM aginwala <aginwala@asu.edu> wrote:
>>
>>> Thanks for the fixes. Found a bug in current code which breaks both
>>> nbctl with local socket and daemon mode on follower nodes. Also nbctl
>>> daemon mode always tries to connect to leader node by default which also
>>> failed to connect.
>>> e.g. export OVN_NB_DB=tcp:<lb_vip>:6641; ovn-nbctl  --pidfile --detach.
>>>  2019-08-14T01:07:06Z|00036|ovsdb_idl|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> clustered database server is not cluster leader; trying another server
>>> 2019-08-14T01:07:06Z|00037|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> entering RECONNECT
>>> 2019-08-14T01:07:06Z|00038|ovsdb_idl|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> SERVER_MONITOR_COND_REQUESTED -> RETRY at lib/ovsdb-idl.c:1917
>>> 2019-08-14T01:07:06Z|00039|poll_loop|DBG|wakeup due to 0-ms timeout at
>>> lib/reconnect.c:643
>>> 2019-08-14T01:07:06Z|00040|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> connection attempt timed out
>>> 2019-08-14T01:07:06Z|00041|reconnect|INFO|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> waiting 2 seconds before reconnect
>>> 2019-08-14T01:07:06Z|00042|reconnect|DBG|unix:/var/run/openvswitch/ovnnb_db.sock:
>>> entering BACKOFF
>>>
>>> Need to explicitly specify no-leader-only to work around.  ovn-nbctl
>>> --pidfile --detach --no-leader-only.
>>> For LB VIP, since LB sees all nodes are active, connections are
>>> established to all cluster nodes equally. I am using round robin setting
>>> for LB VIP for 3 node cluster using raft.
>>>
>>> Hence, are we always going to avoid this behavior because this is
>>> forcing to always connect to leader and not to followers? So if we use this
>>> approach, idl will show ovn-nbctl: tcp:<lb_vip>:6641: database connection
>>> failed () if requests reaches followers and only processes success if
>>> request reaches leader node with this patch.
>>>
>>>
>>> On Tue, Aug 13, 2019 at 9:23 AM Han Zhou <zhouhan@gmail.com> wrote:
>>>
>>>> From: Han Zhou <hzhou8@ebay.com>
>>>>
>>>> When clustered mode is used, the client needs to retry connecting
>>>> to new servers when certain failures happen. Today it is allowed to
>>>> retry new connection only if multiple remotes are used, which prevents
>>>> using LB VIP with clustered nodes. This patch makes sure the retry
>>>> logic works when using LB VIP: although same IP is used for retrying,
>>>> the LB can actually redirect the connection to a new node.
>>>>
>>>> Signed-off-by: Han Zhou <hzhou8@ebay.com>
>>>> ---
>>>>  lib/ovsdb-idl.c        | 11 +++-------
>>>>  tests/ovsdb-cluster.at | 57
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  tests/test-ovsdb.c     |  1 +
>>>>  3 files changed, 61 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
>>>> index 1a6a4af..190143f 100644
>>>> --- a/lib/ovsdb-idl.c
>>>> +++ b/lib/ovsdb-idl.c
>>>> @@ -657,12 +657,8 @@ ovsdb_idl_state_to_string(enum ovsdb_idl_state
>>>> state)
>>>>  static void
>>>>  ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
>>>>  {
>>>> -    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) >
>>>> 1) {
>>>> -        ovsdb_idl_force_reconnect(idl);
>>>> -        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>>>> -    } else {
>>>> -        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
>>>> -    }
>>>> +    ovsdb_idl_force_reconnect(idl);
>>>> +    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
>>>>  }
>>>>
>>>>  static void
>>>> @@ -1895,8 +1891,7 @@ ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
>>>>      if (!database) {
>>>>          VLOG_INFO_RL(&rl, "%s: server does not have %s database",
>>>>                       server_name, idl->data.class_->database);
>>>> -    } else if (!strcmp(database->model, "clustered")
>>>> -               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
>>>> +    } else if (!strcmp(database->model, "clustered")) {
>>>>
>>> > I think this check jsonrpc_session_get_n_remotes(idl->session) > 1 is
>>> still needed for follower nodes for monitor condition to avoid this bug.
>>> Correct me if I am wrong or missed any case.
>>>
>>>          uint64_t index = database->n_index ? *database->index : 0;
>>>>
>>>>          if (!database->schema) {
>>>> diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
>>>> index 4701272..6a13843 100644
>>>> --- a/tests/ovsdb-cluster.at
>>>> +++ b/tests/ovsdb-cluster.at
>>>> @@ -63,6 +63,63 @@ m4_define([OVSDB_CHECK_EXECUTION],
>>>>  EXECUTION_EXAMPLES
>>>>
>>>>
>>>> +AT_BANNER([OVSDB - disconnect from cluster])
>>>> +
>>>> +# When a node is disconnected from the cluster, the IDL should
>>>> disconnect
>>>> +# and retry even if it uses a single remote, because the remote IP can
>>>> be
>>>> +# a VIP on a load-balance.
>>>> +AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
>>>> +AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
>>>> +
>>>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>>>> +ordinal_schema > schema
>>>> +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db
>>>> $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
>>>> +cid=`ovsdb-tool db-cid s1.db`
>>>> +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
>>>> +for i in `seq 2 3`; do
>>>> +    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name
>>>> unix:s$i.raft unix:s1.raft])
>>>> +done
>>>> +
>>>> +on_exit 'kill `cat *.pid`'
>>>> +for i in `seq 3`; do
>>>> +    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach
>>>> --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i
>>>> --remote=punix:s$i.ovsdb s$i.db])
>>>> +done
>>>> +for i in `seq 3`; do
>>>> +    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
>>>> +done
>>>> +
>>>> +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
>>>> +      {"op": "insert",
>>>> +       "table": "simple",
>>>> +       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
>>>> +
>>>> +# Connect to a single remote s3.  Use "wait" to trigger a non-op
>>>> transaction so
>>>> +# that test-ovsdb will not quit.
>>>> +
>>>> +on_exit 'pkill test-ovsdb'
>>>> +test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl
>>>> unix:s3.ovsdb '[["idltest",
>>>> +      {"op": "wait",
>>>> +       "table": "simple",
>>>> +       "where": [["i", "==", 1]],
>>>> +       "columns": ["i"],
>>>> +       "until": "==",
>>>> +       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
>>>> +
>>>> +OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
>>>> +
>>>> +# Shutdown the other 2 servers so that s3 is disconnected from the
>>>> cluster.
>>>> +for i in 2 1; do
>>>> +    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
>>>> +done
>>>> +
>>>> +# The test-ovsdb should detect the disconnect and retry.
>>>> +OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
>>>> +
>>>> +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
>>>> +
>>>> +AT_CLEANUP
>>>> +
>>>> +
>>>>  OVS_START_SHELL_HELPERS
>>>>  # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
>>>>  ovsdb_cluster_failure_test () {
>>>> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
>>>> index 187eb28..b1a4be3 100644
>>>> --- a/tests/test-ovsdb.c
>>>> +++ b/tests/test-ovsdb.c
>>>> @@ -2412,6 +2412,7 @@ do_idl(struct ovs_cmdl_context *ctx)
>>>>      track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
>>>>
>>>>      idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true,
>>>> true);
>>>> +    ovsdb_idl_set_leader_only(idl, false);
>>>>      if (ctx->argc > 2) {
>>>>          struct stream *stream;
>>>>
>>>> --
>>>> 2.1.0
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>>
>>>

Patch
diff mbox series

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 1a6a4af..190143f 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -657,12 +657,8 @@  ovsdb_idl_state_to_string(enum ovsdb_idl_state state)
 static void
 ovsdb_idl_retry_at(struct ovsdb_idl *idl, const char *where)
 {
-    if (idl->session && jsonrpc_session_get_n_remotes(idl->session) > 1) {
-        ovsdb_idl_force_reconnect(idl);
-        ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
-    } else {
-        ovsdb_idl_transition_at(idl, IDL_S_ERROR, where);
-    }
+    ovsdb_idl_force_reconnect(idl);
+    ovsdb_idl_transition_at(idl, IDL_S_RETRY, where);
 }
 
 static void
@@ -1895,8 +1891,7 @@  ovsdb_idl_check_server_db(struct ovsdb_idl *idl)
     if (!database) {
         VLOG_INFO_RL(&rl, "%s: server does not have %s database",
                      server_name, idl->data.class_->database);
-    } else if (!strcmp(database->model, "clustered")
-               && jsonrpc_session_get_n_remotes(idl->session) > 1) {
+    } else if (!strcmp(database->model, "clustered")) {
         uint64_t index = database->n_index ? *database->index : 0;
 
         if (!database->schema) {
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 4701272..6a13843 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -63,6 +63,63 @@  m4_define([OVSDB_CHECK_EXECUTION],
 EXECUTION_EXAMPLES
 
 
+AT_BANNER([OVSDB - disconnect from cluster])
+
+# When a node is disconnected from the cluster, the IDL should disconnect
+# and retry even if it uses a single remote, because the remote IP can be
+# a VIP on a load-balance.
+AT_SETUP([OVSDB cluster - disconnect from cluster, single remote])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 3`; do
+    AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq 3`; do
+    AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq 3`; do
+    AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+      {"op": "insert",
+       "table": "simple",
+       "row": {"i": 1}}]]'], [0], [ignore], [ignore])
+
+# Connect to a single remote s3.  Use "wait" to trigger a non-op transaction so
+# that test-ovsdb will not quit.
+
+on_exit 'pkill test-ovsdb'
+test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -v -t10 idl unix:s3.ovsdb '[["idltest",
+      {"op": "wait",
+       "table": "simple",
+       "where": [["i", "==", 1]],
+       "columns": ["i"],
+       "until": "==",
+       "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 &
+
+OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log])
+
+# Shutdown the other 2 servers so that s3 is disconnected from the cluster.
+for i in 2 1; do
+    OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+# The test-ovsdb should detect the disconnect and retry.
+OVS_WAIT_UNTIL([grep disconnect test-ovsdb.log])
+
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s3], [s3.pid])
+
+AT_CLEANUP
+
+
 OVS_START_SHELL_HELPERS
 # ovsdb_cluster_failure_test SCHEMA_FUNC OUTPUT TRANSACTION...
 ovsdb_cluster_failure_test () {
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 187eb28..b1a4be3 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -2412,6 +2412,7 @@  do_idl(struct ovs_cmdl_context *ctx)
     track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
 
     idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
+    ovsdb_idl_set_leader_only(idl, false);
     if (ctx->argc > 2) {
         struct stream *stream;